diff --git a/pom.xml b/pom.xml index 5b58e3e84cf..1d62de4e541 100644 --- a/pom.xml +++ b/pom.xml @@ -91,6 +91,12 @@ ${lucene.version} compile + + org.apache.lucene + lucene-suggest + ${lucene.version} + compile + diff --git a/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java b/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java index 6673f5e811c..0003ae465e1 100644 --- a/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java +++ b/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java @@ -37,6 +37,7 @@ import org.elasticsearch.search.facet.AbstractFacetBuilder; import org.elasticsearch.search.highlight.HighlightBuilder; import org.elasticsearch.search.sort.SortBuilder; import org.elasticsearch.search.sort.SortOrder; +import org.elasticsearch.search.suggest.SuggestBuilder; import java.util.Map; @@ -646,6 +647,22 @@ public class SearchRequestBuilder extends ActionRequestBuilder indexBoost = null; private String[] stats; @@ -400,6 +403,13 @@ public class SearchSourceBuilder implements ToXContent { return this; } + public SuggestBuilder suggest() { + if (suggestBuilder == null) { + suggestBuilder = new SuggestBuilder(); + } + return suggestBuilder; + } + /** * Sets no fields to be loaded, resulting in only id and type to be returned per field. */ @@ -709,6 +719,10 @@ public class SearchSourceBuilder implements ToXContent { highlightBuilder.toXContent(builder, params); } + if (suggestBuilder != null) { + suggestBuilder.toXContent(builder, params); + } + if (stats != null) { builder.startArray("stats"); for (String stat : stats) { diff --git a/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java b/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java index 2a38084e214..6363e7f413b 100644 --- a/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java +++ b/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java @@ -48,6 +48,7 @@ import org.elasticsearch.search.internal.InternalSearchHits; import org.elasticsearch.search.internal.InternalSearchResponse; import org.elasticsearch.search.query.QuerySearchResult; import org.elasticsearch.search.query.QuerySearchResultProvider; +import org.elasticsearch.search.suggest.Suggest; import java.util.ArrayList; import java.util.Collection; @@ -373,7 +374,38 @@ public class SearchPhaseController extends AbstractComponent { } } + // merge suggest results + Suggest suggest = null; + if (!queryResults.isEmpty()) { + List mergedSuggestions = null; + for (QuerySearchResultProvider resultProvider : queryResults.values()) { + Suggest shardResult = resultProvider.queryResult().suggest(); + if (shardResult == null) { + continue; + } + + if (mergedSuggestions == null) { + mergedSuggestions = shardResult.getSuggestions(); + continue; + } + + for (Suggest.Suggestion shardCommand : shardResult.getSuggestions()) { + for (Suggest.Suggestion mergedSuggestion : mergedSuggestions) { + if (mergedSuggestion.getName().equals(shardCommand.getName())) { + mergedSuggestion.reduce(shardCommand); + } + } + } + } + if (mergedSuggestions != null) { + suggest = new Suggest(mergedSuggestions); + for (Suggest.Suggestion suggestion : mergedSuggestions) { + suggestion.trim(); + } + } + } + InternalSearchHits searchHits = new InternalSearchHits(hits.toArray(new InternalSearchHit[hits.size()]), totalHits, maxScore); - return new InternalSearchResponse(searchHits, facets, timedOut); + return new InternalSearchResponse(searchHits, facets, suggest, timedOut); } } diff --git a/src/main/java/org/elasticsearch/search/internal/InternalSearchResponse.java b/src/main/java/org/elasticsearch/search/internal/InternalSearchResponse.java index 75c705a0e56..e3ffc1fbe0c 100644 --- a/src/main/java/org/elasticsearch/search/internal/InternalSearchResponse.java +++ b/src/main/java/org/elasticsearch/search/internal/InternalSearchResponse.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.facet.Facets; import org.elasticsearch.search.facet.InternalFacets; +import org.elasticsearch.search.suggest.Suggest; import java.io.IOException; @@ -41,16 +42,19 @@ public class InternalSearchResponse implements Streamable, ToXContent { private InternalFacets facets; + private Suggest suggest; + private boolean timedOut; - public static final InternalSearchResponse EMPTY = new InternalSearchResponse(new InternalSearchHits(new InternalSearchHit[0], 0, 0), null, false); + public static final InternalSearchResponse EMPTY = new InternalSearchResponse(new InternalSearchHits(new InternalSearchHit[0], 0, 0), null, null, false); private InternalSearchResponse() { } - public InternalSearchResponse(InternalSearchHits hits, InternalFacets facets, boolean timedOut) { + public InternalSearchResponse(InternalSearchHits hits, InternalFacets facets, Suggest suggest, boolean timedOut) { this.hits = hits; this.facets = facets; + this.suggest = suggest; this.timedOut = timedOut; } @@ -66,12 +70,19 @@ public class InternalSearchResponse implements Streamable, ToXContent { return facets; } + public Suggest suggest() { + return suggest; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { hits.toXContent(builder, params); if (facets != null) { facets.toXContent(builder, params); } + if (suggest != null) { + suggest.toXContent(builder, params); + } return builder; } @@ -87,6 +98,9 @@ public class InternalSearchResponse implements Streamable, ToXContent { if (in.readBoolean()) { facets = InternalFacets.readFacets(in); } + if (in.readBoolean()) { + suggest = Suggest.readSuggest(in); + } timedOut = in.readBoolean(); } @@ -99,6 +113,12 @@ public class InternalSearchResponse implements Streamable, ToXContent { out.writeBoolean(true); facets.writeTo(out); } + if (suggest == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + suggest.writeTo(out); + } out.writeBoolean(timedOut); } } diff --git a/src/main/java/org/elasticsearch/search/internal/SearchContext.java b/src/main/java/org/elasticsearch/search/internal/SearchContext.java index e535b8ede33..3f6d88fb7ab 100644 --- a/src/main/java/org/elasticsearch/search/internal/SearchContext.java +++ b/src/main/java/org/elasticsearch/search/internal/SearchContext.java @@ -60,6 +60,7 @@ import org.elasticsearch.search.highlight.SearchContextHighlight; import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.query.QuerySearchResult; import org.elasticsearch.search.scan.ScanContext; +import org.elasticsearch.search.suggest.SuggestionSearchContext; import java.util.ArrayList; import java.util.HashMap; @@ -160,6 +161,8 @@ public class SearchContext implements Releasable { private SearchContextHighlight highlight; + private SuggestionSearchContext suggest; + private SearchLookup searchLookup; private boolean queryRewritten; @@ -301,6 +304,14 @@ public class SearchContext implements Releasable { this.highlight = highlight; } + public SuggestionSearchContext suggest() { + return suggest; + } + + public void suggest(SuggestionSearchContext suggest) { + this.suggest = suggest; + } + public boolean hasScriptFields() { return scriptFields != null; } diff --git a/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/src/main/java/org/elasticsearch/search/query/QueryPhase.java index 1ff009b4882..61068ec8083 100644 --- a/src/main/java/org/elasticsearch/search/query/QueryPhase.java +++ b/src/main/java/org/elasticsearch/search/query/QueryPhase.java @@ -35,6 +35,7 @@ import org.elasticsearch.search.internal.ScopePhase; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.sort.SortParseElement; import org.elasticsearch.search.sort.TrackScoresParseElement; +import org.elasticsearch.search.suggest.SuggestPhase; import java.util.Map; @@ -44,10 +45,12 @@ import java.util.Map; public class QueryPhase implements SearchPhase { private final FacetPhase facetPhase; + private final SuggestPhase suggestPhase; @Inject - public QueryPhase(FacetPhase facetPhase) { + public QueryPhase(FacetPhase facetPhase, SuggestPhase suggestPhase) { this.facetPhase = facetPhase; + this.suggestPhase = suggestPhase; } @Override @@ -68,7 +71,8 @@ public class QueryPhase implements SearchPhase { .put("min_score", new MinScoreParseElement()) .put("minScore", new MinScoreParseElement()) .put("timeout", new TimeoutParseElement()) - .putAll(facetPhase.parseElements()); + .putAll(facetPhase.parseElements()) + .putAll(suggestPhase.parseElements()); return parseElements.build(); } @@ -185,6 +189,7 @@ public class QueryPhase implements SearchPhase { searchContext.searcher().processedScope(); } + suggestPhase.execute(searchContext); facetPhase.execute(searchContext); } } diff --git a/src/main/java/org/elasticsearch/search/query/QuerySearchResult.java b/src/main/java/org/elasticsearch/search/query/QuerySearchResult.java index 6ed9ae81fa5..decf5dca374 100644 --- a/src/main/java/org/elasticsearch/search/query/QuerySearchResult.java +++ b/src/main/java/org/elasticsearch/search/query/QuerySearchResult.java @@ -25,6 +25,7 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.facet.Facets; import org.elasticsearch.search.facet.InternalFacets; +import org.elasticsearch.search.suggest.Suggest; import org.elasticsearch.transport.TransportResponse; import java.io.IOException; @@ -43,6 +44,7 @@ public class QuerySearchResult extends TransportResponse implements QuerySearchR private int size; private TopDocs topDocs; private InternalFacets facets; + private Suggest suggest; private boolean searchTimedOut; public QuerySearchResult() { @@ -101,6 +103,14 @@ public class QuerySearchResult extends TransportResponse implements QuerySearchR this.facets = facets; } + public Suggest suggest() { + return suggest; + } + + public void suggest(Suggest suggest) { + this.suggest = suggest; + } + public int from() { return from; } @@ -136,6 +146,9 @@ public class QuerySearchResult extends TransportResponse implements QuerySearchR if (in.readBoolean()) { facets = InternalFacets.readFacets(in); } + if (in.readBoolean()) { + suggest = Suggest.readSuggest(in); + } searchTimedOut = in.readBoolean(); } @@ -153,6 +166,12 @@ public class QuerySearchResult extends TransportResponse implements QuerySearchR out.writeBoolean(true); facets.writeTo(out); } + if (suggest == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + suggest.writeTo(out); + } out.writeBoolean(searchTimedOut); } } diff --git a/src/main/java/org/elasticsearch/search/suggest/Suggest.java b/src/main/java/org/elasticsearch/search/suggest/Suggest.java new file mode 100644 index 00000000000..ab594ca97f6 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/suggest/Suggest.java @@ -0,0 +1,509 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest; + +import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Streamable; +import org.elasticsearch.common.text.Text; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentBuilderString; + +import java.io.IOException; +import java.util.*; + +/** + * Top level suggest result, containing the result for each suggestion. + */ +public class Suggest implements Iterable, Streamable, ToXContent { + + static class Fields { + + static final XContentBuilderString SUGGEST = new XContentBuilderString("suggest"); + + } + + private List suggestions; + + Suggest() { + } + + public Suggest(List suggestions) { + this.suggestions = suggestions; + } + + /** + * @return the suggestions + */ + public List getSuggestions() { + return suggestions; + } + + @Override + public Iterator iterator() { + return suggestions.iterator(); + } + + @Override + public void readFrom(StreamInput in) throws IOException { + int size = in.readVInt(); + suggestions = new ArrayList(size); + for (int i = 0; i < size; i++) { + Suggestion suggestion = new Suggestion(); + suggestion.readFrom(in); + suggestions.add(suggestion); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(suggestions.size()); + for (Suggestion command : suggestions) { + command.writeTo(out); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(Fields.SUGGEST); + for (Suggestion suggestion : suggestions) { + suggestion.toXContent(builder, params); + } + builder.endObject(); + return null; + } + + public static Suggest readSuggest(StreamInput in) throws IOException { + Suggest result = new Suggest(); + result.readFrom(in); + return result; + } + + /** + * The suggestion responses corresponding with the suggestions in the request. + */ + public static class Suggestion implements Streamable, ToXContent { + + static class Fields { + + static final XContentBuilderString TERMS = new XContentBuilderString("terms"); + + } + + private String name; + private int size; + private Sort sort; + private final List terms = new ArrayList(5); + + Suggestion() { + } + + Suggestion(String name, int size, Sort sort) { + this.name = name; + this.size = size; // The suggested term size specified in request, only used for merging shard responses + this.sort = sort; + } + + void addTerm(Term term) { + terms.add(term); + } + + /** + * @return The terms outputted by the suggest analyzer using the suggested text. Embeds the actual suggested + * terms. + */ + public List getTerms() { + return terms; + } + + /** + * @return The name of the suggestion as is defined in the request. + */ + public String getName() { + return name; + } + + /** + * Merges the result of another suggestion into this suggestion. + */ + public void reduce(Suggestion other) { + assert name.equals(other.name); + assert terms.size() == other.terms.size(); + for (int i = 0; i < terms.size(); i++) { + Term thisTerm = terms.get(i); + Term otherTerm = other.terms.get(i); + thisTerm.reduce(otherTerm, sort); + } + } + + /** + * Trims the number of suggestions per suggest text term to the requested size. + */ + public void trim() { + for (Term term : terms) { + term.trim(size); + } + } + + @Override + public void readFrom(StreamInput in) throws IOException { + name = in.readString(); + size = in.readVInt(); + sort = Sort.fromId(in.readByte()); + int size = in.readVInt(); + terms.clear(); + for (int i = 0; i < size; i++) { + terms.add(Term.read(in)); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(name); + out.writeVInt(size); + out.writeByte(sort.id()); + out.writeVInt(terms.size()); + for (Term term : terms) { + term.writeTo(out); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(name); + builder.startArray(Fields.TERMS); + for (Term term : terms) { + term.toXContent(builder, params); + } + builder.endArray(); + builder.endObject(); + return builder; + } + + + /** + * Represents a term from the suggest text, that contains the term, start/end offsets and zero or more suggested + * terms for this term in the suggested text. + */ + public static class Term implements Streamable, ToXContent { + + static class Fields { + + static final XContentBuilderString TERM = new XContentBuilderString("term"); + static final XContentBuilderString SUGGESTIONS = new XContentBuilderString("suggestions"); + static final XContentBuilderString START_OFFSET = new XContentBuilderString("start_offset"); + static final XContentBuilderString END_OFFSET = new XContentBuilderString("end_offset"); + + } + + private Text term; + private int startOffset; + private int endOffset; + + private List suggested; + + public Term(Text term, int startOffset, int endOffset) { + this.term = term; + this.startOffset = startOffset; + this.endOffset = endOffset; + this.suggested = new ArrayList(5); + } + + Term() { + } + + void addSuggested(SuggestedTerm suggestedTerm) { + suggested.add(suggestedTerm); + } + + void reduce(Term otherTerm, Sort sort) { + assert term.equals(otherTerm.term()); + assert startOffset == otherTerm.startOffset; + assert endOffset == otherTerm.endOffset; + + for (SuggestedTerm otherSuggestedTerm : otherTerm.suggested) { + int index = suggested.indexOf(otherSuggestedTerm); + if (index >= 0) { + SuggestedTerm thisSuggestedTerm = suggested.get(index); + thisSuggestedTerm.setFrequency(thisSuggestedTerm.frequency + otherSuggestedTerm.frequency); + } else { + suggested.add(otherSuggestedTerm); + } + } + + Comparator comparator; + switch (sort) { + case SCORE: + comparator = SuggestPhase.SCORE; + break; + case FREQUENCY: + comparator = SuggestPhase.FREQUENCY; + break; + default: + throw new ElasticSearchException("Could not resolve comparator in reduce phase."); + } + Collections.sort(suggested, comparator); + } + + public Text term() { + return term; + } + + /** + * @return the term (analyzed by suggest analyzer) originating from the suggest text. + */ + public String getTerm() { + return term().string(); + } + + /** + * @return the start offset of this term in the suggest text. + */ + public int getStartOffset() { + return startOffset; + } + + /** + * @return the end offset of this term in the suggest text. + */ + public int getEndOffset() { + return endOffset; + } + + /** + * @return The suggested terms for this particular suggest text term. If there are no suggested terms then + * an empty list is returned. + */ + public List getSuggested() { + return suggested; + } + + void trim(int size) { + int suggestionsToRemove = Math.max(0, suggested.size() - size); + for (int i = 0; i < suggestionsToRemove; i++) { + suggested.remove(suggested.size() - 1); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + Term term = (Term) o; + + if (endOffset != term.endOffset) return false; + if (startOffset != term.startOffset) return false; + if (!this.term.equals(term.term)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = term.hashCode(); + result = 31 * result + startOffset; + result = 31 * result + endOffset; + return result; + } + + static Term read(StreamInput in) throws IOException { + Term term = new Term(); + term.readFrom(in); + return term; + } + + @Override + public void readFrom(StreamInput in) throws IOException { + term = in.readText(); + startOffset = in.readVInt(); + endOffset = in.readVInt(); + int suggestedWords = in.readVInt(); + suggested = new ArrayList(suggestedWords); + for (int j = 0; j < suggestedWords; j++) { + suggested.add(SuggestedTerm.create(in)); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeText(term); + out.writeVInt(startOffset); + out.writeVInt(endOffset); + out.writeVInt(suggested.size()); + for (SuggestedTerm suggestedTerm : suggested) { + suggestedTerm.writeTo(out); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(Fields.TERM, term); + builder.field(Fields.START_OFFSET, startOffset); + builder.field(Fields.END_OFFSET, endOffset); + builder.startArray(Fields.SUGGESTIONS); + for (SuggestedTerm suggestedTerm : suggested) { + suggestedTerm.toXContent(builder, params); + } + builder.endArray(); + builder.endObject(); + return builder; + } + + /** + * Represents the suggested term, containing a term and its document frequency and score. + */ + public static class SuggestedTerm implements Streamable, ToXContent { + + static class Fields { + + static final XContentBuilderString TERM = new XContentBuilderString("term"); + static final XContentBuilderString FREQUENCY = new XContentBuilderString("frequency"); + static final XContentBuilderString SCORE = new XContentBuilderString("score"); + + } + + private Text term; + private int frequency; + private float score; + + SuggestedTerm(Text term, int frequency, float score) { + this.term = term; + this.frequency = frequency; + this.score = score; + } + + SuggestedTerm() { + } + + public void setFrequency(int frequency) { + this.frequency = frequency; + } + + /** + * @return The actual term. + */ + public Text getTerm() { + return term; + } + + /** + * @return How often this suggested term appears in the index. + */ + public int getFrequency() { + return frequency; + } + + /** + * @return The score based on the edit distance difference between the suggested term and the + * term in the suggest text. + */ + public float getScore() { + return score; + } + + static SuggestedTerm create(StreamInput in) throws IOException { + SuggestedTerm suggestion = new SuggestedTerm(); + suggestion.readFrom(in); + return suggestion; + } + + @Override + public void readFrom(StreamInput in) throws IOException { + term = in.readText(); + frequency = in.readVInt(); + score = in.readFloat(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeText(term); + out.writeVInt(frequency); + out.writeFloat(score); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(Fields.TERM, term); + builder.field(Fields.FREQUENCY, frequency); + builder.field(Fields.SCORE, score); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + SuggestedTerm that = (SuggestedTerm) o; + return term.equals(that.term); + + } + + @Override + public int hashCode() { + return term.hashCode(); + } + } + + } + + enum Sort { + + /** + * Sort should first be based on score. + */ + SCORE((byte) 0x0), + + /** + * Sort should first be based on document frequency. + */ + FREQUENCY((byte) 0x1); + + private byte id; + + private Sort(byte id) { + this.id = id; + } + + public byte id() { + return id; + } + + static Sort fromId(byte id) { + if (id == 0) { + return SCORE; + } else if (id == 1) { + return FREQUENCY; + } else { + throw new ElasticSearchException("Illegal suggest sort " + id); + } + } + + } + + } + +} diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java b/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java new file mode 100644 index 00000000000..55f8213078a --- /dev/null +++ b/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java @@ -0,0 +1,383 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest; + +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Defines how to perform suggesting. This builders allows a number of global options to be specified and + * an arbitrary number of {@link org.elasticsearch.search.suggest.SuggestBuilder.FuzzySuggestion} instances. + *

+ * Suggesting works by suggesting terms that appear in the suggest text that are similar compared to the terms in + * provided text. These spelling suggestions are based on several options described in this class. + */ +public class SuggestBuilder implements ToXContent { + + private String globalText; + + private final List suggestions = new ArrayList(); + + /** + * Sets the text to provide suggestions for. The suggest text is a required option that needs + * to be set either via this setter or via the {@link org.elasticsearch.search.suggest.SuggestBuilder.Suggestion#setText(String)} method. + *

+ * The suggest text gets analyzed by the suggest analyzer or the suggest field search analyzer. + * For each analyzed token, suggested terms are suggested if possible. + */ + public SuggestBuilder setText(String globalText) { + this.globalText = globalText; + return this; + } + + /** + * Adds an {@link org.elasticsearch.search.suggest.SuggestBuilder.FuzzySuggestion} instance under a user defined name. + * The order in which the Suggestions are added, is the same as in the response. + */ + public SuggestBuilder addSuggestion(Suggestion suggestion) { + suggestions.add(suggestion); + return this; + } + + /** + * Returns all suggestions with the defined names. + */ + public List getSuggestion() { + return suggestions; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject("suggest"); + if (globalText != null) { + builder.field("text", globalText); + } + + builder.startObject("suggestions"); + for (Suggestion suggestion : suggestions) { + builder = suggestion.toXContent(builder, params); + } + builder.endObject(); + + builder.endObject(); + return builder; + } + + /** + * Convenience factory method. + * + * @param name The name of this suggestion. This is a required parameter. + */ + public static FuzzySuggestion fuzzySuggestion(String name) { + return new FuzzySuggestion(name); + } + + public static abstract class Suggestion implements ToXContent { + + private String name; + private String suggester; + private String text; + + public Suggestion(String name, String suggester) { + this.name = name; + this.suggester = suggester; + } + + /** + * Same as in {@link SuggestBuilder#setText(String)}, but in the suggestion scope. + */ + public T setText(String text) { + this.text = text; + return (T) this; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(name); + if (suggester != null) { + builder.field("suggester", suggester); + } + if (text != null) { + builder.field("text", text); + } + builder = innerToXContent(builder, params); + builder.endObject(); + return builder; + } + + protected abstract XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException; + } + + /** + * Defines the actual suggest command. Each command uses the global options unless defined in the suggestion itself. + * All options are the same as the global options, but are only applicable for this suggestion. + */ + public static class FuzzySuggestion extends Suggestion { + + private String field; + private String analyzer; + private String suggestMode; + private Float accuracy; + private Integer size; + private String sort; + private String stringDistance; + private Boolean lowerCaseTerms; + private Integer maxEdits; + private Integer factor; + private Float maxTermFreq; + private Integer prefixLength; + private Integer minWordLength; + private Float minDocFreq; + private Integer shardSize; + + /** + * @param name The name of this suggestion. This is a required parameter. + */ + public FuzzySuggestion(String name) { + super(name, "fuzzy"); + } + + /** + * Sets from what field to fetch the candidate suggestions from. This is an required option and needs to be set + * via this setter or {@link org.elasticsearch.search.suggest.SuggestBuilder.FuzzySuggestion#setField(String)} method + */ + public FuzzySuggestion setField(String field) { + this.field = field; + return this; + } + + /** + * Sets the analyzer to analyse to suggest text with. Defaults to the search analyzer of the suggest field. + */ + public FuzzySuggestion setAnalyzer(String analyzer) { + this.analyzer = analyzer; + return this; + } + + /** + * The global suggest mode controls what suggested terms are included or controls for what suggest text tokens, + * terms should be suggested for. Three possible values can be specified: + *

    + *
  1. missing - Only suggest terms in the suggest text that aren't in the index. This is the default. + *
  2. popular - Only suggest terms that occur in more docs then the original suggest text term. + *
  3. always - Suggest any matching suggest terms based on tokens in the suggest text. + *
+ */ + public FuzzySuggestion setSuggestMode(String suggestMode) { + this.suggestMode = suggestMode; + return this; + } + + /** + * Sets how similar the suggested terms at least need to be compared to the original suggest text tokens. + * A value between 0 and 1 can be specified. This value will be compared to the string distance result of each + * candidate spelling correction. + *

+ * Default is 0.5f. + */ + public FuzzySuggestion setAccuracy(float accuracy) { + this.accuracy = accuracy; + return this; + } + + /** + * Sets the maximum suggestions to be returned per suggest text term. + */ + public FuzzySuggestion setSize(int size) { + if (size <= 0) { + throw new ElasticSearchIllegalArgumentException("Size must be positive"); + } + + this.size = size; + return this; + } + + /** + * Sets how to sort the suggest terms per suggest text token. + * Two possible values: + *

    + *
  1. score - Sort should first be based on score, then document frequency and then the term itself. + *
  2. frequency - Sort should first be based on document frequency, then scotr and then the term itself. + *
+ *

+ * What the score is depends on the suggester being used. + */ + public FuzzySuggestion setSort(String sort) { + this.sort = sort; + return this; + } + + /** + * Sets what string distance implementation to use for comparing how similar suggested terms are. + * Four possible values can be specified: + *

    + *
  1. internal - This is the default and is based on damerau_levenshtein, but + * highly optimized for comparing string distance for terms inside the index. + *
  2. damerau_levenshtein - String distance algorithm based on Damerau-Levenshtein algorithm. + *
  3. levenstein - String distance algorithm based on Levenstein edit distance algorithm. + *
  4. jarowinkler - String distance algorithm based on Jaro-Winkler algorithm. + *
  5. ngram - String distance algorithm based on n-grams. + *
+ */ + public FuzzySuggestion setStringDistance(String stringDistance) { + this.stringDistance = stringDistance; + return this; + } + + /** + * Sets whether to lowercase the suggest text tokens just before suggesting terms. + */ + public FuzzySuggestion setLowerCaseTerms(Boolean lowerCaseTerms) { + this.lowerCaseTerms = lowerCaseTerms; + return this; + } + + /** + * Sets the maximum edit distance candidate suggestions can have in order to be considered as a suggestion. + * Can only be a value between 1 and 2. Any other value result in an bad request error being thrown. Defaults to 2. + */ + public FuzzySuggestion setMaxEdits(Integer maxEdits) { + this.maxEdits = maxEdits; + return this; + } + + /** + * A factor that is used to multiply with the size in order to inspect more candidate suggestions. + * Can improve accuracy at the cost of performance. Defaults to 5. + */ + public FuzzySuggestion setFactor(Integer factor) { + this.factor = factor; + return this; + } + + /** + * Sets a maximum threshold in number of documents a suggest text token can exist in order to be corrected. + * Can be a relative percentage number (e.g 0.4) or an absolute number to represent document frequencies. + * If an value higher than 1 is specified then fractional can not be specified. Defaults to 0.01f. + *

+ * This can be used to exclude high frequency terms from being suggested. High frequency terms are usually + * spelled correctly on top of this this also improves the suggest performance. + */ + public FuzzySuggestion setMaxTermFreq(float maxTermFreq) { + this.maxTermFreq = maxTermFreq; + return this; + } + + /** + * Sets the number of minimal prefix characters that must match in order be a candidate suggestion. + * Defaults to 1. Increasing this number improves suggest performance. Usually misspellings don't occur in the + * beginning of terms. + */ + public FuzzySuggestion setPrefixLength(int prefixLength) { + this.prefixLength = prefixLength; + return this; + } + + /** + * The minimum length a suggest text term must have in order to be corrected. Defaults to 4. + */ + public FuzzySuggestion setMinWordLength(int minWordLength) { + this.minWordLength = minWordLength; + return this; + } + + /** + * Sets a minimal threshold in number of documents a suggested term should appear in. This can be specified as + * an absolute number or as a relative percentage of number of documents. This can improve quality by only suggesting + * high frequency terms. Defaults to 0f and is not enabled. If a value higher than 1 is specified then the number + * cannot be fractional. + */ + public FuzzySuggestion setMinDocFreq(float minDocFreq) { + this.minDocFreq = minDocFreq; + return this; + } + + /** + * Sets the maximum number of suggested term to be retrieved from each individual shard. During the reduce + * phase the only the top N suggestions are returned based on the size option. Defaults to the + * size option. + *

+ * Setting this to a value higher than the `size` can be useful in order to get a more accurate document frequency + * for suggested terms. Due to the fact that terms are partitioned amongst shards, the shard level document + * frequencies of suggestions may not be precise. Increasing this will make these document frequencies + * more precise. + */ + public FuzzySuggestion setShardSize(Integer shardSize) { + this.shardSize = shardSize; + return this; + } + + @Override + public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException { + if (analyzer != null) { + builder.field("analyzer", analyzer); + } + if (field != null) { + builder.field("field", field); + } + if (suggestMode != null) { + builder.field("suggest_mode", suggestMode); + } + if (accuracy != null) { + builder.field("accuracy", accuracy); + } + if (size != null) { + builder.field("size", size); + } + if (sort != null) { + builder.field("sort", sort); + } + if (stringDistance != null) { + builder.field("string_distance", stringDistance); + } + if (lowerCaseTerms != null) { + builder.field("lowercase_terms", lowerCaseTerms); + } + if (maxEdits != null) { + builder.field("max_edits", maxEdits); + } + if (factor != null) { + builder.field("factor", factor); + } + if (maxTermFreq != null) { + builder.field("max_term_freq", maxTermFreq); + } + if (prefixLength != null) { + builder.field("prefix_length", prefixLength); + } + if (minWordLength != null) { + builder.field("min_word_len", minWordLength); + } + if (minDocFreq != null) { + builder.field("min_doc_freq", minDocFreq); + } + if (shardSize != null) { + builder.field("shard_size", shardSize); + } + return builder; + } + } + +} diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java b/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java new file mode 100644 index 00000000000..bfb60477b62 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java @@ -0,0 +1,235 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.spell.*; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.LevenshteinAutomata; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.search.SearchParseElement; +import org.elasticsearch.search.internal.SearchContext; + +/** + * + */ +public class SuggestParseElement implements SearchParseElement { + + @Override + public void parse(XContentParser parser, SearchContext context) throws Exception { + SuggestionSearchContext suggestionSearchContext = new SuggestionSearchContext(); + + BytesRef globalText = null; + + Analyzer defaultAnalyzer = context.mapperService().searchAnalyzer(); + float defaultAccuracy = SpellChecker.DEFAULT_ACCURACY; + int defaultSize = 5; + SuggestMode defaultSuggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; + Suggest.Suggestion.Sort defaultSort = Suggest.Suggestion.Sort.SCORE; + StringDistance defaultStringDistance = DirectSpellChecker.INTERNAL_LEVENSHTEIN; + boolean defaultLowerCaseTerms = false; // changed from Lucene default because we rely on search analyzer to properly handle it + int defaultMaxEdits = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE; + int defaultFactor = 5; + float defaultMaxTermFreq = 0.01f; + int defaultPrefixLength = 1; + int defaultMinQueryLength = 4; + float defaultMinDocFreq = 0f; + + String fieldName = null; + XContentParser.Token token; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + } else if (token.isValue()) { + if ("text".equals(fieldName)) { + globalText = parser.bytes(); + } else { + throw new ElasticSearchIllegalArgumentException("[suggest] does not support [" + fieldName + "]"); + } + } else if (token == XContentParser.Token.START_OBJECT) { + // TODO: Once we have more suggester impls we need to have different parsing logic per suggester. + // This code is now specific for the fuzzy suggester + if ("suggestions".equals(fieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_OBJECT) { + SuggestionSearchContext.Suggestion suggestion = new SuggestionSearchContext.Suggestion(); + suggestionSearchContext.addSuggestion(fieldName, suggestion); + + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fieldName = parser.currentName(); + } else if (token.isValue()) { + if ("suggester".equals(fieldName)) { + suggestion.suggester(parser.text()); + } else if ("analyzer".equals(fieldName)) { + String analyzerName = parser.text(); + Analyzer analyzer = context.mapperService().analysisService().analyzer(analyzerName); + if (analyzer == null) { + throw new ElasticSearchIllegalArgumentException("Analyzer [" + analyzerName + "] doesn't exists"); + } + suggestion.analyzer(analyzer); + } else if ("text".equals(fieldName)) { + suggestion.text(parser.bytes()); + } else if ("field".equals(fieldName)) { + suggestion.setField(parser.text()); + } else if ("accuracy".equals(fieldName)) { + suggestion.accuracy(parser.floatValue()); + } else if ("size".equals(fieldName)) { + suggestion.size(parser.intValue()); + } else if ("suggest_mode".equals(fieldName) || "suggestMode".equals(fieldName)) { + suggestion.suggestMode(resolveSuggestMode(parser.text())); + } else if ("sort".equals(fieldName)) { + suggestion.sort(resolveSort(parser.text())); + } else if ("string_distance".equals(fieldName) || "stringDistance".equals(fieldName)) { + suggestion.stringDistance(resolveDistance(parser.text())); + } else if ("lowercase_terms".equals(fieldName) || "lowercaseTerms".equals(fieldName)) { + suggestion.lowerCaseTerms(parser.booleanValue()); + } else if ("max_edits".equals(fieldName) || "maxEdits".equals(fieldName) || "fuzziness".equals(fieldName)) { + suggestion.maxEdits(parser.intValue()); + if (suggestion.maxEdits() < 1 || suggestion.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { + throw new ElasticSearchIllegalArgumentException("Illegal max_edits value " + suggestion.maxEdits()); + } + } else if ("factor".equals(fieldName)) { + suggestion.factor(parser.intValue()); + } else if ("max_term_freq".equals(fieldName) || "maxTermFreq".equals(fieldName)) { + suggestion.maxTermFreq(parser.floatValue()); + } else if ("prefix_length".equals(fieldName) || "prefixLength".equals(fieldName)) { + suggestion.prefixLength(parser.intValue()); + } else if ("min_word_len".equals(fieldName) || "minWordLen".equals(fieldName)) { + suggestion.minQueryLength(parser.intValue()); + } else if ("min_doc_freq".equals(fieldName) || "minDocFreq".equals(fieldName)) { + suggestion.minDocFreq(parser.floatValue()); + } else if ("shard_size".equals(fieldName) || "shardSize".equals(fieldName)) { + suggestion.shardSize(parser.intValue()); + } else { + throw new ElasticSearchIllegalArgumentException("suggester[fuzzy] doesn't support [" + fieldName + "]"); + } + } + } + } + } + } + } + } + + // Verify options and set defaults + for (SuggestionSearchContext.Suggestion command : suggestionSearchContext.suggestions().values()) { + if (command.suggester() == null) { + throw new ElasticSearchIllegalArgumentException("The required suggester option is missing"); + } + if (command.field() == null) { + throw new ElasticSearchIllegalArgumentException("The required field option is missing"); + } + + if (command.text() == null) { + if (globalText == null) { + throw new ElasticSearchIllegalArgumentException("The required text option is missing"); + } + + command.text(globalText); + } + if (command.analyzer() == null) { + command.analyzer(defaultAnalyzer); + } + if (command.accuracy() == null) { + command.accuracy(defaultAccuracy); + } + if (command.size() == null) { + command.size(defaultSize); + } + if (command.suggestMode() == null) { + command.suggestMode(defaultSuggestMode); + } + if (command.sort() == null) { + command.sort(defaultSort); + } + if (command.stringDistance() == null) { + command.stringDistance(defaultStringDistance); + } + if (command.lowerCaseTerms() == null) { + command.lowerCaseTerms(defaultLowerCaseTerms); + } + if (command.maxEdits() == null) { + command.maxEdits(defaultMaxEdits); + } + if (command.factor() == null) { + command.factor(defaultFactor); + } + if (command.maxTermFreq() == null) { + command.maxTermFreq(defaultMaxTermFreq); + } + if (command.prefixLength() == null) { + command.prefixLength(defaultPrefixLength); + } + if (command.minWordLength() == null) { + command.minQueryLength(defaultMinQueryLength); + } + if (command.minDocFreq() == null) { + command.minDocFreq(defaultMinDocFreq); + } + if (command.shardSize() == null) { + command.shardSize(defaultSize); + } + } + context.suggest(suggestionSearchContext); + } + + private SuggestMode resolveSuggestMode(String sortVal) { + if ("missing".equals(sortVal)) { + return SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; + } else if ("popular".equals(sortVal)) { + return SuggestMode.SUGGEST_MORE_POPULAR; + } else if ("always".equals(sortVal)) { + return SuggestMode.SUGGEST_ALWAYS; + } else { + throw new ElasticSearchIllegalArgumentException("Illegal suggest mode " + sortVal); + } + } + + private Suggest.Suggestion.Sort resolveSort(String sortVal) { + if ("score".equals(sortVal)) { + return Suggest.Suggestion.Sort.SCORE; + } else if ("frequency".equals(sortVal)) { + return Suggest.Suggestion.Sort.FREQUENCY; + } else { + throw new ElasticSearchIllegalArgumentException("Illegal suggest sort " + sortVal); + } + } + + private StringDistance resolveDistance(String distanceVal) { + if ("internal".equals(distanceVal)) { + return DirectSpellChecker.INTERNAL_LEVENSHTEIN; + } else if ("damerau_levenshtein".equals(distanceVal)) { + return new LuceneLevenshteinDistance(); + } else if ("levenstein".equals(distanceVal)) { + return new LevensteinDistance(); + } else if ("jarowinkler".equals(distanceVal)) { + return new JaroWinklerDistance(); + } else if ("ngram".equals(distanceVal)) { + return new NGramDistance(); + } else { + throw new ElasticSearchIllegalArgumentException("Illegal distance option " + distanceVal); + } + } + +} diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestPhase.java b/src/main/java/org/elasticsearch/search/suggest/SuggestPhase.java new file mode 100644 index 00000000000..811c01eca23 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/suggest/SuggestPhase.java @@ -0,0 +1,231 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest; + +import com.google.common.collect.ImmutableMap; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.spell.DirectSpellChecker; +import org.apache.lucene.search.spell.SuggestWord; +import org.apache.lucene.search.spell.SuggestWordFrequencyComparator; +import org.apache.lucene.search.spell.SuggestWordQueue; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.UnicodeUtil; +import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.component.AbstractComponent; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.io.FastCharArrayReader; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.text.BytesText; +import org.elasticsearch.common.text.StringText; +import org.elasticsearch.common.text.Text; +import org.elasticsearch.search.SearchParseElement; +import org.elasticsearch.search.SearchPhase; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.search.suggest.Suggest.Suggestion; + +/** + */ +public class SuggestPhase extends AbstractComponent implements SearchPhase { + + @Inject + public SuggestPhase(Settings settings) { + super(settings); + } + + @Override + public Map parseElements() { + ImmutableMap.Builder parseElements = ImmutableMap.builder(); + parseElements.put("suggest", new SuggestParseElement()); + return parseElements.build(); + } + + @Override + public void preProcess(SearchContext context) { + } + + @Override + public void execute(SearchContext context) throws ElasticSearchException { + SuggestionSearchContext suggest = context.suggest(); + if (suggest == null) { + return; + } + + try { + CharsRef spare = new CharsRef(); // Maybe add CharsRef to CacheRecycler? + List suggestions = new ArrayList(2); + for (Map.Entry entry : suggest.suggestions().entrySet()) { + SuggestionSearchContext.Suggestion suggestion = entry.getValue(); + if ("fuzzy".equals(suggestion.suggester())) { + suggestions.add(executeDirectSpellChecker(entry.getKey(), suggestion, context, spare)); + } else { + throw new ElasticSearchIllegalArgumentException("Unsupported suggester[" + suggestion.suggester() + "]"); + } + } + context.queryResult().suggest(new Suggest(suggestions)); + } catch (IOException e) { + throw new ElasticSearchException("I/O exception during suggest phase", e); + } + } + + private Suggestion executeDirectSpellChecker(String name, SuggestionSearchContext.Suggestion suggestion, SearchContext context, CharsRef spare) throws IOException { + DirectSpellChecker directSpellChecker = new DirectSpellChecker(); + directSpellChecker.setAccuracy(suggestion.accuracy()); + Comparator comparator; + switch (suggestion.sort()) { + case SCORE: + comparator = SuggestWordQueue.DEFAULT_COMPARATOR; + break; + case FREQUENCY: + comparator = LUCENE_FREQUENCY; + break; + default: + throw new ElasticSearchIllegalArgumentException("Illegal suggest sort: " + suggestion.sort()); + } + directSpellChecker.setComparator(comparator); + directSpellChecker.setDistance(suggestion.stringDistance()); + directSpellChecker.setLowerCaseTerms(suggestion.lowerCaseTerms()); + directSpellChecker.setMaxEdits(suggestion.maxEdits()); + directSpellChecker.setMaxInspections(suggestion.factor()); + directSpellChecker.setMaxQueryFrequency(suggestion.maxTermFreq()); + directSpellChecker.setMinPrefix(suggestion.prefixLength()); + directSpellChecker.setMinQueryLength(suggestion.minWordLength()); + directSpellChecker.setThresholdFrequency(suggestion.minDocFreq()); + + Suggestion response = new Suggestion( + name, suggestion.size(), suggestion.sort() + ); + List tokens = queryTerms(suggestion, spare); + for (Token token : tokens) { + IndexReader indexReader = context.searcher().getIndexReader(); + // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef + SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar( + token.term, suggestion.shardSize(), indexReader, suggestion.suggestMode() + ); + Text key = new BytesText(new BytesArray(token.term.bytes())); + Suggestion.Term resultTerm = new Suggestion.Term(key, token.startOffset, token.endOffset); + for (SuggestWord suggestWord : suggestedWords) { + Text word = new StringText(suggestWord.string); + resultTerm.addSuggested(new Suggestion.Term.SuggestedTerm(word, suggestWord.freq, suggestWord.score)); + } + response.addTerm(resultTerm); + } + return response; + } + + private List queryTerms(SuggestionSearchContext.Suggestion suggestion, CharsRef spare) throws IOException { + UnicodeUtil.UTF8toUTF16(suggestion.text(), spare); + TokenStream ts = suggestion.analyzer().tokenStream( + suggestion.field(), new FastCharArrayReader(spare.chars, spare.offset, spare.length) + ); + ts.reset(); + + TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); + OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); + BytesRef termRef = termAtt.getBytesRef(); + + List result = new ArrayList(5); + while (ts.incrementToken()) { + termAtt.fillBytesRef(); + Term term = new Term(suggestion.field(), BytesRef.deepCopyOf(termRef)); + result.add(new Token(term, offsetAtt.startOffset(), offsetAtt.endOffset())); + } + return result; + } + + private static Comparator LUCENE_FREQUENCY = new SuggestWordFrequencyComparator(); + public static Comparator SCORE = new Score(); + public static Comparator FREQUENCY = new Frequency(); + + // Same behaviour as comparators in suggest module, but for SuggestedWord + // Highest score first, then highest freq first, then lowest term first + public static class Score implements Comparator { + + @Override + public int compare(Suggestion.Term.SuggestedTerm first, Suggestion.Term.SuggestedTerm second) { + // first criteria: the distance + int cmp = Float.compare(second.getScore(), first.getScore()); + if (cmp != 0) { + return cmp; + } + + // second criteria (if first criteria is equal): the popularity + cmp = second.getFrequency() - first.getFrequency(); + if (cmp != 0) { + return cmp; + } + // third criteria: term text + return first.getTerm().compareTo(second.getTerm()); + } + + } + + // Same behaviour as comparators in suggest module, but for SuggestedWord + // Highest freq first, then highest score first, then lowest term first + public static class Frequency implements Comparator { + + @Override + public int compare(Suggestion.Term.SuggestedTerm first, Suggestion.Term.SuggestedTerm second) { + // first criteria: the popularity + int cmp = second.getFrequency() - first.getFrequency(); + if (cmp != 0) { + return cmp; + } + + // second criteria (if first criteria is equal): the distance + cmp = Float.compare(second.getScore(), first.getScore()); + if (cmp != 0) { + return cmp; + } + + // third criteria: term text + return first.getTerm().compareTo(second.getTerm()); + } + + } + + private static class Token { + + public final Term term; + public final int startOffset; + public final int endOffset; + + private Token(Term term, int startOffset, int endOffset) { + this.term = term; + this.startOffset = startOffset; + this.endOffset = endOffset; + } + + } + +} diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java b/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java new file mode 100644 index 00000000000..54952ed76d9 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java @@ -0,0 +1,206 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.spell.StringDistance; +import org.apache.lucene.search.spell.SuggestMode; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.ElasticSearchIllegalArgumentException; + +import java.util.LinkedHashMap; +import java.util.Map; + +/** + */ +public class SuggestionSearchContext { + + private final Map suggestions = new LinkedHashMap(4); + + public void addSuggestion(String name, Suggestion suggestion) { + suggestions.put(name, suggestion); + } + + public Map suggestions() { + return suggestions; + } + + public static class Suggestion { + + private String suggester; + private BytesRef text; + private String field; + private Analyzer analyzer; + private SuggestMode suggestMode; + private Float accuracy; + private Integer size; + private Suggest.Suggestion.Sort sort; + private StringDistance stringDistance; + private Boolean lowerCaseTerms; + private Integer maxEdits; + private Integer factor; + private Float maxTermFreq; + private Integer prefixLength; + private Integer minWordLength; + private Float minDocFreq; + private Integer shardSize; + + public String suggester() { + return suggester; + } + + public void suggester(String suggester) { + this.suggester = suggester; + } + + public BytesRef text() { + return text; + } + + public void text(BytesRef text) { + this.text = text; + } + + public Analyzer analyzer() { + return analyzer; + } + + public void analyzer(Analyzer analyzer) { + this.analyzer = analyzer; + } + + public String field() { + return field; + } + + public void setField(String field) { + this.field = field; + } + + public SuggestMode suggestMode() { + return suggestMode; + } + + public void suggestMode(SuggestMode suggestMode) { + this.suggestMode = suggestMode; + } + + public Float accuracy() { + return accuracy; + } + + public void accuracy(float accuracy) { + this.accuracy = accuracy; + } + + public Integer size() { + return size; + } + + public void size(int size) { + if (size <= 0) { + throw new ElasticSearchIllegalArgumentException("Size must be positive"); + } + + this.size = size; + } + + public Suggest.Suggestion.Sort sort() { + return sort; + } + + public void sort(Suggest.Suggestion.Sort sort) { + this.sort = sort; + } + + public StringDistance stringDistance() { + return stringDistance; + } + + public void stringDistance(StringDistance distance) { + this.stringDistance = distance; + } + + public Boolean lowerCaseTerms() { + return lowerCaseTerms; + } + + public void lowerCaseTerms(boolean lowerCaseTerms) { + this.lowerCaseTerms = lowerCaseTerms; + } + + public Integer maxEdits() { + return maxEdits; + } + + public void maxEdits(int maxEdits) { + this.maxEdits = maxEdits; + } + + public Integer factor() { + return factor; + } + + public void factor(int factor) { + this.factor = factor; + } + + public Float maxTermFreq() { + return maxTermFreq; + } + + public void maxTermFreq(float maxTermFreq) { + this.maxTermFreq = maxTermFreq; + } + + public Integer prefixLength() { + return prefixLength; + } + + public void prefixLength(int prefixLength) { + this.prefixLength = prefixLength; + } + + public Integer minWordLength() { + return minWordLength; + } + + public void minQueryLength(int minQueryLength) { + this.minWordLength = minQueryLength; + } + + public Float minDocFreq() { + return minDocFreq; + } + + public void minDocFreq(float minDocFreq) { + this.minDocFreq = minDocFreq; + } + + public Integer shardSize() { + return shardSize; + } + + public void shardSize(Integer shardSize) { + this.shardSize = shardSize; + } + } + +} diff --git a/src/test/java/org/elasticsearch/benchmark/search/SuggestSearchBenchMark.java b/src/test/java/org/elasticsearch/benchmark/search/SuggestSearchBenchMark.java new file mode 100644 index 00000000000..799b2fccb73 --- /dev/null +++ b/src/test/java/org/elasticsearch/benchmark/search/SuggestSearchBenchMark.java @@ -0,0 +1,166 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.benchmark.search; + +import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; +import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.client.Requests; +import org.elasticsearch.common.StopWatch; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.SizeValue; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.node.Node; +import org.elasticsearch.search.suggest.Suggest; +import org.elasticsearch.search.suggest.SuggestBuilder; + +import java.io.IOException; +import java.util.List; + +import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; +import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; +import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.elasticsearch.index.query.QueryBuilders.*; +import static org.elasticsearch.node.NodeBuilder.nodeBuilder; + +/** + */ +public class SuggestSearchBenchMark { + + public static void main(String[] args) throws Exception { + int SEARCH_ITERS = 200; + + Settings settings = settingsBuilder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .build(); + + Node[] nodes = new Node[1]; + for (int i = 0; i < nodes.length; i++) { + nodes[i] = nodeBuilder().settings(settingsBuilder().put(settings).put("name", "node" + i)).node(); + } + + Client client = nodes[0].client(); + try { + client.admin().indices().prepareCreate("test").setSettings(settings).addMapping("type1", XContentFactory.jsonBuilder().startObject().startObject("type1") + .startObject("_source").field("enabled", false).endObject() + .startObject("_all").field("enabled", false).endObject() + .startObject("_type").field("index", "no").endObject() + .startObject("_id").field("index", "no").endObject() + .startObject("properties") + .startObject("field").field("type", "string").field("index", "not_analyzed").field("omit_norms", true).endObject() + .endObject() + .endObject().endObject()).execute().actionGet(); + ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet(); + if (clusterHealthResponse.timedOut()) { + System.err.println("--> Timed out waiting for cluster health"); + } + + StopWatch stopWatch = new StopWatch().start(); + long COUNT = SizeValue.parseSizeValue("10m").singles(); + int BATCH = 100; + System.out.println("Indexing [" + COUNT + "] ..."); + long ITERS = COUNT / BATCH; + long i = 1; + char character = 'a'; + int idCounter = 0; + for (; i <= ITERS; i++) { + int termCounter = 0; + BulkRequestBuilder request = client.prepareBulk(); + for (int j = 0; j < BATCH; j++) { + request.add(Requests.indexRequest("test").type("type1").id(Integer.toString(idCounter++)).source(source("prefix" + character + termCounter++))); + } + character++; + BulkResponse response = request.execute().actionGet(); + if (response.hasFailures()) { + System.err.println("failures..."); + } + } + System.out.println("Indexing took " + stopWatch.totalTime()); + + client.admin().indices().prepareRefresh().execute().actionGet(); + System.out.println("Count: " + client.prepareCount().setQuery(matchAllQuery()).execute().actionGet().count()); + } catch (Exception e) { + System.out.println("--> Index already exists, ignoring indexing phase, waiting for green"); + ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("10m").execute().actionGet(); + if (clusterHealthResponse.timedOut()) { + System.err.println("--> Timed out waiting for cluster health"); + } + client.admin().indices().prepareRefresh().execute().actionGet(); + System.out.println("Count: " + client.prepareCount().setQuery(matchAllQuery()).execute().actionGet().count()); + } + + + System.out.println("Warming up..."); + char startChar = 'a'; + for (int i = 0; i <= 20; i++) { + String term = "prefix" + startChar; + SearchResponse response = client.prepareSearch() + .setQuery(prefixQuery("field", term)) + .addSuggestion(new SuggestBuilder.FuzzySuggestion("field").setField("field").setText(term).setSuggestMode("always")) + .execute().actionGet(); + if (response.hits().totalHits() == 0) { + System.err.println("No hits"); + continue; + } + startChar++; + } + + + System.out.println("Starting benchmarking suggestions."); + startChar = 'a'; + long timeTaken = 0; + for (int i = 0; i <= SEARCH_ITERS; i++) { + String term = "prefix" + startChar; + SearchResponse response = client.prepareSearch() + .setQuery(matchQuery("field", term)) + .addSuggestion(new SuggestBuilder.FuzzySuggestion("field").setText(term).setField("field").setSuggestMode("always")) + .execute().actionGet(); + timeTaken += response.tookInMillis(); + if (response.suggest() == null) { + System.err.println("No suggestions"); + continue; + } + List suggestedTerms = response.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested(); + if (suggestedTerms == null || suggestedTerms.isEmpty()) { + System.err.println("No suggestions"); + } + startChar++; + } + + System.out.println("Avg time taken without filter " + (timeTaken / SEARCH_ITERS)); + + client.close(); + for (Node node : nodes) { + node.close(); + } + } + + private static XContentBuilder source(String nameValue) throws IOException { + return jsonBuilder().startObject() + .field("field", nameValue) + .endObject(); + } + +} diff --git a/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java new file mode 100644 index 00000000000..578519f2a9e --- /dev/null +++ b/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java @@ -0,0 +1,348 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.test.integration.search.suggest; + +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.test.integration.AbstractNodesTests; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import static org.elasticsearch.index.query.QueryBuilders.matchQuery; +import static org.elasticsearch.search.suggest.SuggestBuilder.fuzzySuggestion; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.notNullValue; + +/** + */ +public class SuggestSearchTests extends AbstractNodesTests { + + private Client client; + + @BeforeClass + public void createNodes() throws Exception { + startNode("server1"); + startNode("server2"); + client = getClient(); + } + + @AfterClass + public void closeNodes() { + client.close(); + closeAllNodes(); + } + + protected Client getClient() { + return client("server1"); + } + + @Test + public void testSimple() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + client.admin().indices().prepareCreate("test").execute().actionGet(); + + client.prepareIndex("test", "type1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .field("text", "abcd") + .endObject() + ) + .execute().actionGet(); + client.prepareIndex("test", "type1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .field("text", "aacd") + .endObject() + ) + .execute().actionGet(); + client.prepareIndex("test", "type1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .field("text", "abbd") + .endObject() + ) + .execute().actionGet(); + client.prepareIndex("test", "type1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .field("text", "abcc") + .endObject() + ) + .execute().actionGet(); + client.admin().indices().prepareRefresh().execute().actionGet(); + + SearchResponse search = client.prepareSearch() + .setQuery(matchQuery("text", "spellcecker")) + .addSuggestion( + fuzzySuggestion("test").setSuggestMode("always") // Always, otherwise the results can vary between requests. + .setText("abcd") + .setField("text")) + .execute().actionGet(); + + assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0)); + assertThat(search.suggest(), notNullValue()); + assertThat(search.suggest().getSuggestions().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(0).getName(), equalTo("test")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getTerm(), equalTo("abcd")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().size(), equalTo(3)); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("aacd")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(1).getTerm().string(), equalTo("abbd")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(2).getTerm().string(), equalTo("abcc")); + + client.prepareSearch() + .addSuggestion( + fuzzySuggestion("test").setSuggestMode("always") // Always, otherwise the results can vary between requests. + .setText("abcd") + .setField("text")) + .execute().actionGet(); + + assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0)); + assertThat(search.suggest(), notNullValue()); + assertThat(search.suggest().getSuggestions().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(0).getName(), equalTo("test")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().size(), equalTo(3)); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("aacd")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(1).getTerm().string(), equalTo("abbd")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(2).getTerm().string(), equalTo("abcc")); + } + + @Test + public void testEmpty() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + client.admin().indices().prepareCreate("test").execute().actionGet(); + + SearchResponse search = client.prepareSearch() + .setQuery(matchQuery("text", "spellcecker")) + .addSuggestion( + fuzzySuggestion("test").setSuggestMode("always") // Always, otherwise the results can vary between requests. + .setText("abcd") + .setField("text")) + .execute().actionGet(); + + assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0)); + assertThat(search.suggest(), notNullValue()); + assertThat(search.suggest().getSuggestions().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(0).getName(), equalTo("test")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getTerm(), equalTo("abcd")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().size(), equalTo(0)); + + client.prepareSearch() + .addSuggestion( + fuzzySuggestion("test").setSuggestMode("always") // Always, otherwise the results can vary between requests. + .setText("abcd") + .setField("text")) + .execute().actionGet(); + + assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0)); + assertThat(search.suggest(), notNullValue()); + assertThat(search.suggest().getSuggestions().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(0).getName(), equalTo("test")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().size(), equalTo(0)); + } + + @Test + public void testWithMultipleCommands() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + client.admin().indices().prepareCreate("test").execute().actionGet(); + + client.prepareIndex("test", "type1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .field("field1", "prefix_abcd") + .field("field2", "prefix_efgh") + .endObject() + ) + .execute().actionGet(); + client.prepareIndex("test", "type1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .field("field1", "prefix_aacd") + .field("field2", "prefix_eeeh") + .endObject() + ) + .execute().actionGet(); + client.prepareIndex("test", "type1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .field("field1", "prefix_abbd") + .field("field2", "prefix_efff") + .endObject() + ) + .execute().actionGet(); + client.prepareIndex("test", "type1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .field("field1", "prefix_abcc") + .field("field2", "prefix_eggg") + .endObject() + ) + .execute().actionGet(); + client.admin().indices().prepareRefresh().execute().actionGet(); + + SearchResponse search = client.prepareSearch() + .addSuggestion(fuzzySuggestion("size1") + .setSize(1).setText("prefix_abcd").setMaxTermFreq(10).setMinDocFreq(0) + .setField("field1").setSuggestMode("always")) + .addSuggestion(fuzzySuggestion("field2") + .setField("field2").setText("prefix_eeeh prefix_efgh") + .setMaxTermFreq(10).setMinDocFreq(0).setSuggestMode("always")) + .addSuggestion(fuzzySuggestion("accuracy") + .setField("field2").setText("prefix_efgh").setAccuracy(1f) + .setMaxTermFreq(10).setMinDocFreq(0).setSuggestMode("always")) + .execute().actionGet(); + + assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0)); + assertThat(search.suggest(), notNullValue()); + assertThat(search.suggest().getSuggestions().size(), equalTo(3)); + assertThat(search.suggest().getSuggestions().get(0).getName(), equalTo("size1")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("prefix_aacd")); + assertThat(search.suggest().getSuggestions().get(1).getName(), equalTo("field2")); + assertThat(search.suggest().getSuggestions().get(1).getTerms().size(), equalTo(2)); + assertThat(search.suggest().getSuggestions().get(1).getTerms().get(0).getSuggested().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(1).getTerms().get(1).getSuggested().size(), equalTo(3)); + assertThat(search.suggest().getSuggestions().get(1).getTerms().get(1).getSuggested().get(0).getTerm().string(), equalTo("prefix_eeeh")); + assertThat(search.suggest().getSuggestions().get(1).getTerms().get(1).getSuggested().get(1).getTerm().string(), equalTo("prefix_efff")); + assertThat(search.suggest().getSuggestions().get(1).getTerms().get(1).getSuggested().get(2).getTerm().string(), equalTo("prefix_eggg")); + assertThat(search.suggest().getSuggestions().get(2).getName(), equalTo("accuracy")); + assertThat(search.suggest().getSuggestions().get(2).getTerms().get(0).getSuggested().isEmpty(), equalTo(true)); + } + + @Test + public void testSizeAndSort() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + client.admin().indices().prepareCreate("test").execute().actionGet(); + + Map termsAndDocCount = new HashMap(); + termsAndDocCount.put("prefix_aaad", 20); + termsAndDocCount.put("prefix_abbb", 18); + termsAndDocCount.put("prefix_aaca", 16); + termsAndDocCount.put("prefix_abba", 14); + termsAndDocCount.put("prefix_accc", 12); + termsAndDocCount.put("prefix_addd", 10); + termsAndDocCount.put("prefix_abaa", 8); + termsAndDocCount.put("prefix_dbca", 6); + termsAndDocCount.put("prefix_cbad", 4); + + termsAndDocCount.put("prefix_aacd", 1); + termsAndDocCount.put("prefix_abcc", 1); + termsAndDocCount.put("prefix_accd", 1); + + for (Map.Entry entry : termsAndDocCount.entrySet()) { + for (int i = 0; i < entry.getValue(); i++) { + client.prepareIndex("test", "type1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .field("field1", entry.getKey()) + .endObject() + ) + .execute().actionGet(); + } + } + client.admin().indices().prepareRefresh().execute().actionGet(); + + SearchResponse search = client.prepareSearch() + .setSuggestText("prefix_abcd") + .addSuggestion(fuzzySuggestion("size3SortScoreFirst") + .setSize(3).setMinDocFreq(0).setField("field1").setSuggestMode("always")) + .addSuggestion(fuzzySuggestion("size10SortScoreFirst") + .setSize(10).setMinDocFreq(0).setField("field1").setSuggestMode("always")) + .addSuggestion(fuzzySuggestion("size3SortScoreFirstMaxEdits1") + .setMaxEdits(1) + .setSize(10).setMinDocFreq(0).setField("field1").setSuggestMode("always")) + .addSuggestion(fuzzySuggestion("size10SortFrequencyFirst") + .setSize(10).setSort("frequency").setShardSize(1000) + .setMinDocFreq(0).setField("field1").setSuggestMode("always")) + .execute().actionGet(); + + assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0)); + assertThat(search.suggest(), notNullValue()); + assertThat(search.suggest().getSuggestions().size(), equalTo(4)); + assertThat(search.suggest().getSuggestions().get(0).getName(), equalTo("size3SortScoreFirst")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().size(), equalTo(3)); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("prefix_aacd")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(1).getTerm().string(), equalTo("prefix_abcc")); + assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(2).getTerm().string(), equalTo("prefix_accd")); + + assertThat(search.suggest().getSuggestions().get(1).getName(), equalTo("size10SortScoreFirst")); + assertThat(search.suggest().getSuggestions().get(1).getTerms().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(1).getTerms().get(0).getSuggested().size(), equalTo(10)); + assertThat(search.suggest().getSuggestions().get(1).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("prefix_aacd")); + assertThat(search.suggest().getSuggestions().get(1).getTerms().get(0).getSuggested().get(1).getTerm().string(), equalTo("prefix_abcc")); + assertThat(search.suggest().getSuggestions().get(1).getTerms().get(0).getSuggested().get(2).getTerm().string(), equalTo("prefix_accd")); + // This fails sometimes. Depending on how the docs are sharded. The suggested suggest corrections get the df on shard level, which + // isn't correct comparing it to the index level. +// assertThat(search.suggest().suggestions().get(1).getSuggestedWords().get("prefix_abcd").get(3).getTerm(), equalTo("prefix_aaad")); + + assertThat(search.suggest().getSuggestions().get(2).getName(), equalTo("size3SortScoreFirstMaxEdits1")); + assertThat(search.suggest().getSuggestions().get(2).getTerms().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(2).getTerms().get(0).getSuggested().size(), equalTo(3)); + assertThat(search.suggest().getSuggestions().get(2).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("prefix_aacd")); + assertThat(search.suggest().getSuggestions().get(2).getTerms().get(0).getSuggested().get(1).getTerm().string(), equalTo("prefix_abcc")); + assertThat(search.suggest().getSuggestions().get(2).getTerms().get(0).getSuggested().get(2).getTerm().string(), equalTo("prefix_accd")); + + assertThat(search.suggest().getSuggestions().get(3).getName(), equalTo("size10SortFrequencyFirst")); + assertThat(search.suggest().getSuggestions().get(3).getTerms().size(), equalTo(1)); + assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().size(), equalTo(10)); + assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("prefix_aaad")); + assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(1).getTerm().string(), equalTo("prefix_abbb")); + assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(2).getTerm().string(), equalTo("prefix_aaca")); + assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(3).getTerm().string(), equalTo("prefix_abba")); + assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(4).getTerm().string(), equalTo("prefix_accc")); + assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(5).getTerm().string(), equalTo("prefix_addd")); + assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(6).getTerm().string(), equalTo("prefix_abaa")); + assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(7).getTerm().string(), equalTo("prefix_dbca")); + assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(8).getTerm().string(), equalTo("prefix_cbad")); + assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(9).getTerm().string(), equalTo("prefix_aacd")); +// assertThat(search.suggest().suggestions().get(3).getSuggestedWords().get("prefix_abcd").get(4).getTerm(), equalTo("prefix_abcc")); +// assertThat(search.suggest().suggestions().get(3).getSuggestedWords().get("prefix_abcd").get(4).getTerm(), equalTo("prefix_accd")); + } + + +}