diff --git a/pom.xml b/pom.xml
index 5b58e3e84cf..1d62de4e541 100644
--- a/pom.xml
+++ b/pom.xml
@@ -91,6 +91,12 @@
${lucene.version}
compile
+
+ org.apache.lucene
+ lucene-suggest
+ ${lucene.version}
+ compile
+
diff --git a/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java b/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java
index 6673f5e811c..0003ae465e1 100644
--- a/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java
+++ b/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java
@@ -37,6 +37,7 @@ import org.elasticsearch.search.facet.AbstractFacetBuilder;
import org.elasticsearch.search.highlight.HighlightBuilder;
import org.elasticsearch.search.sort.SortBuilder;
import org.elasticsearch.search.sort.SortOrder;
+import org.elasticsearch.search.suggest.SuggestBuilder;
import java.util.Map;
@@ -646,6 +647,22 @@ public class SearchRequestBuilder extends ActionRequestBuilder indexBoost = null;
private String[] stats;
@@ -400,6 +403,13 @@ public class SearchSourceBuilder implements ToXContent {
return this;
}
+ public SuggestBuilder suggest() {
+ if (suggestBuilder == null) {
+ suggestBuilder = new SuggestBuilder();
+ }
+ return suggestBuilder;
+ }
+
/**
* Sets no fields to be loaded, resulting in only id and type to be returned per field.
*/
@@ -709,6 +719,10 @@ public class SearchSourceBuilder implements ToXContent {
highlightBuilder.toXContent(builder, params);
}
+ if (suggestBuilder != null) {
+ suggestBuilder.toXContent(builder, params);
+ }
+
if (stats != null) {
builder.startArray("stats");
for (String stat : stats) {
diff --git a/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java b/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java
index 2a38084e214..6363e7f413b 100644
--- a/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java
+++ b/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java
@@ -48,6 +48,7 @@ import org.elasticsearch.search.internal.InternalSearchHits;
import org.elasticsearch.search.internal.InternalSearchResponse;
import org.elasticsearch.search.query.QuerySearchResult;
import org.elasticsearch.search.query.QuerySearchResultProvider;
+import org.elasticsearch.search.suggest.Suggest;
import java.util.ArrayList;
import java.util.Collection;
@@ -373,7 +374,38 @@ public class SearchPhaseController extends AbstractComponent {
}
}
+ // merge suggest results
+ Suggest suggest = null;
+ if (!queryResults.isEmpty()) {
+ List mergedSuggestions = null;
+ for (QuerySearchResultProvider resultProvider : queryResults.values()) {
+ Suggest shardResult = resultProvider.queryResult().suggest();
+ if (shardResult == null) {
+ continue;
+ }
+
+ if (mergedSuggestions == null) {
+ mergedSuggestions = shardResult.getSuggestions();
+ continue;
+ }
+
+ for (Suggest.Suggestion shardCommand : shardResult.getSuggestions()) {
+ for (Suggest.Suggestion mergedSuggestion : mergedSuggestions) {
+ if (mergedSuggestion.getName().equals(shardCommand.getName())) {
+ mergedSuggestion.reduce(shardCommand);
+ }
+ }
+ }
+ }
+ if (mergedSuggestions != null) {
+ suggest = new Suggest(mergedSuggestions);
+ for (Suggest.Suggestion suggestion : mergedSuggestions) {
+ suggestion.trim();
+ }
+ }
+ }
+
InternalSearchHits searchHits = new InternalSearchHits(hits.toArray(new InternalSearchHit[hits.size()]), totalHits, maxScore);
- return new InternalSearchResponse(searchHits, facets, timedOut);
+ return new InternalSearchResponse(searchHits, facets, suggest, timedOut);
}
}
diff --git a/src/main/java/org/elasticsearch/search/internal/InternalSearchResponse.java b/src/main/java/org/elasticsearch/search/internal/InternalSearchResponse.java
index 75c705a0e56..e3ffc1fbe0c 100644
--- a/src/main/java/org/elasticsearch/search/internal/InternalSearchResponse.java
+++ b/src/main/java/org/elasticsearch/search/internal/InternalSearchResponse.java
@@ -27,6 +27,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.facet.Facets;
import org.elasticsearch.search.facet.InternalFacets;
+import org.elasticsearch.search.suggest.Suggest;
import java.io.IOException;
@@ -41,16 +42,19 @@ public class InternalSearchResponse implements Streamable, ToXContent {
private InternalFacets facets;
+ private Suggest suggest;
+
private boolean timedOut;
- public static final InternalSearchResponse EMPTY = new InternalSearchResponse(new InternalSearchHits(new InternalSearchHit[0], 0, 0), null, false);
+ public static final InternalSearchResponse EMPTY = new InternalSearchResponse(new InternalSearchHits(new InternalSearchHit[0], 0, 0), null, null, false);
private InternalSearchResponse() {
}
- public InternalSearchResponse(InternalSearchHits hits, InternalFacets facets, boolean timedOut) {
+ public InternalSearchResponse(InternalSearchHits hits, InternalFacets facets, Suggest suggest, boolean timedOut) {
this.hits = hits;
this.facets = facets;
+ this.suggest = suggest;
this.timedOut = timedOut;
}
@@ -66,12 +70,19 @@ public class InternalSearchResponse implements Streamable, ToXContent {
return facets;
}
+ public Suggest suggest() {
+ return suggest;
+ }
+
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
hits.toXContent(builder, params);
if (facets != null) {
facets.toXContent(builder, params);
}
+ if (suggest != null) {
+ suggest.toXContent(builder, params);
+ }
return builder;
}
@@ -87,6 +98,9 @@ public class InternalSearchResponse implements Streamable, ToXContent {
if (in.readBoolean()) {
facets = InternalFacets.readFacets(in);
}
+ if (in.readBoolean()) {
+ suggest = Suggest.readSuggest(in);
+ }
timedOut = in.readBoolean();
}
@@ -99,6 +113,12 @@ public class InternalSearchResponse implements Streamable, ToXContent {
out.writeBoolean(true);
facets.writeTo(out);
}
+ if (suggest == null) {
+ out.writeBoolean(false);
+ } else {
+ out.writeBoolean(true);
+ suggest.writeTo(out);
+ }
out.writeBoolean(timedOut);
}
}
diff --git a/src/main/java/org/elasticsearch/search/internal/SearchContext.java b/src/main/java/org/elasticsearch/search/internal/SearchContext.java
index e535b8ede33..3f6d88fb7ab 100644
--- a/src/main/java/org/elasticsearch/search/internal/SearchContext.java
+++ b/src/main/java/org/elasticsearch/search/internal/SearchContext.java
@@ -60,6 +60,7 @@ import org.elasticsearch.search.highlight.SearchContextHighlight;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.query.QuerySearchResult;
import org.elasticsearch.search.scan.ScanContext;
+import org.elasticsearch.search.suggest.SuggestionSearchContext;
import java.util.ArrayList;
import java.util.HashMap;
@@ -160,6 +161,8 @@ public class SearchContext implements Releasable {
private SearchContextHighlight highlight;
+ private SuggestionSearchContext suggest;
+
private SearchLookup searchLookup;
private boolean queryRewritten;
@@ -301,6 +304,14 @@ public class SearchContext implements Releasable {
this.highlight = highlight;
}
+ public SuggestionSearchContext suggest() {
+ return suggest;
+ }
+
+ public void suggest(SuggestionSearchContext suggest) {
+ this.suggest = suggest;
+ }
+
public boolean hasScriptFields() {
return scriptFields != null;
}
diff --git a/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/src/main/java/org/elasticsearch/search/query/QueryPhase.java
index 1ff009b4882..61068ec8083 100644
--- a/src/main/java/org/elasticsearch/search/query/QueryPhase.java
+++ b/src/main/java/org/elasticsearch/search/query/QueryPhase.java
@@ -35,6 +35,7 @@ import org.elasticsearch.search.internal.ScopePhase;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.sort.SortParseElement;
import org.elasticsearch.search.sort.TrackScoresParseElement;
+import org.elasticsearch.search.suggest.SuggestPhase;
import java.util.Map;
@@ -44,10 +45,12 @@ import java.util.Map;
public class QueryPhase implements SearchPhase {
private final FacetPhase facetPhase;
+ private final SuggestPhase suggestPhase;
@Inject
- public QueryPhase(FacetPhase facetPhase) {
+ public QueryPhase(FacetPhase facetPhase, SuggestPhase suggestPhase) {
this.facetPhase = facetPhase;
+ this.suggestPhase = suggestPhase;
}
@Override
@@ -68,7 +71,8 @@ public class QueryPhase implements SearchPhase {
.put("min_score", new MinScoreParseElement())
.put("minScore", new MinScoreParseElement())
.put("timeout", new TimeoutParseElement())
- .putAll(facetPhase.parseElements());
+ .putAll(facetPhase.parseElements())
+ .putAll(suggestPhase.parseElements());
return parseElements.build();
}
@@ -185,6 +189,7 @@ public class QueryPhase implements SearchPhase {
searchContext.searcher().processedScope();
}
+ suggestPhase.execute(searchContext);
facetPhase.execute(searchContext);
}
}
diff --git a/src/main/java/org/elasticsearch/search/query/QuerySearchResult.java b/src/main/java/org/elasticsearch/search/query/QuerySearchResult.java
index 6ed9ae81fa5..decf5dca374 100644
--- a/src/main/java/org/elasticsearch/search/query/QuerySearchResult.java
+++ b/src/main/java/org/elasticsearch/search/query/QuerySearchResult.java
@@ -25,6 +25,7 @@ import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.search.facet.Facets;
import org.elasticsearch.search.facet.InternalFacets;
+import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.transport.TransportResponse;
import java.io.IOException;
@@ -43,6 +44,7 @@ public class QuerySearchResult extends TransportResponse implements QuerySearchR
private int size;
private TopDocs topDocs;
private InternalFacets facets;
+ private Suggest suggest;
private boolean searchTimedOut;
public QuerySearchResult() {
@@ -101,6 +103,14 @@ public class QuerySearchResult extends TransportResponse implements QuerySearchR
this.facets = facets;
}
+ public Suggest suggest() {
+ return suggest;
+ }
+
+ public void suggest(Suggest suggest) {
+ this.suggest = suggest;
+ }
+
public int from() {
return from;
}
@@ -136,6 +146,9 @@ public class QuerySearchResult extends TransportResponse implements QuerySearchR
if (in.readBoolean()) {
facets = InternalFacets.readFacets(in);
}
+ if (in.readBoolean()) {
+ suggest = Suggest.readSuggest(in);
+ }
searchTimedOut = in.readBoolean();
}
@@ -153,6 +166,12 @@ public class QuerySearchResult extends TransportResponse implements QuerySearchR
out.writeBoolean(true);
facets.writeTo(out);
}
+ if (suggest == null) {
+ out.writeBoolean(false);
+ } else {
+ out.writeBoolean(true);
+ suggest.writeTo(out);
+ }
out.writeBoolean(searchTimedOut);
}
}
diff --git a/src/main/java/org/elasticsearch/search/suggest/Suggest.java b/src/main/java/org/elasticsearch/search/suggest/Suggest.java
new file mode 100644
index 00000000000..ab594ca97f6
--- /dev/null
+++ b/src/main/java/org/elasticsearch/search/suggest/Suggest.java
@@ -0,0 +1,509 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest;
+
+import org.elasticsearch.ElasticSearchException;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Streamable;
+import org.elasticsearch.common.text.Text;
+import org.elasticsearch.common.xcontent.ToXContent;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentBuilderString;
+
+import java.io.IOException;
+import java.util.*;
+
+/**
+ * Top level suggest result, containing the result for each suggestion.
+ */
+public class Suggest implements Iterable, Streamable, ToXContent {
+
+ static class Fields {
+
+ static final XContentBuilderString SUGGEST = new XContentBuilderString("suggest");
+
+ }
+
+ private List suggestions;
+
+ Suggest() {
+ }
+
+ public Suggest(List suggestions) {
+ this.suggestions = suggestions;
+ }
+
+ /**
+ * @return the suggestions
+ */
+ public List getSuggestions() {
+ return suggestions;
+ }
+
+ @Override
+ public Iterator iterator() {
+ return suggestions.iterator();
+ }
+
+ @Override
+ public void readFrom(StreamInput in) throws IOException {
+ int size = in.readVInt();
+ suggestions = new ArrayList(size);
+ for (int i = 0; i < size; i++) {
+ Suggestion suggestion = new Suggestion();
+ suggestion.readFrom(in);
+ suggestions.add(suggestion);
+ }
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeVInt(suggestions.size());
+ for (Suggestion command : suggestions) {
+ command.writeTo(out);
+ }
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject(Fields.SUGGEST);
+ for (Suggestion suggestion : suggestions) {
+ suggestion.toXContent(builder, params);
+ }
+ builder.endObject();
+ return null;
+ }
+
+ public static Suggest readSuggest(StreamInput in) throws IOException {
+ Suggest result = new Suggest();
+ result.readFrom(in);
+ return result;
+ }
+
+ /**
+ * The suggestion responses corresponding with the suggestions in the request.
+ */
+ public static class Suggestion implements Streamable, ToXContent {
+
+ static class Fields {
+
+ static final XContentBuilderString TERMS = new XContentBuilderString("terms");
+
+ }
+
+ private String name;
+ private int size;
+ private Sort sort;
+ private final List terms = new ArrayList(5);
+
+ Suggestion() {
+ }
+
+ Suggestion(String name, int size, Sort sort) {
+ this.name = name;
+ this.size = size; // The suggested term size specified in request, only used for merging shard responses
+ this.sort = sort;
+ }
+
+ void addTerm(Term term) {
+ terms.add(term);
+ }
+
+ /**
+ * @return The terms outputted by the suggest analyzer using the suggested text. Embeds the actual suggested
+ * terms.
+ */
+ public List getTerms() {
+ return terms;
+ }
+
+ /**
+ * @return The name of the suggestion as is defined in the request.
+ */
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Merges the result of another suggestion into this suggestion.
+ */
+ public void reduce(Suggestion other) {
+ assert name.equals(other.name);
+ assert terms.size() == other.terms.size();
+ for (int i = 0; i < terms.size(); i++) {
+ Term thisTerm = terms.get(i);
+ Term otherTerm = other.terms.get(i);
+ thisTerm.reduce(otherTerm, sort);
+ }
+ }
+
+ /**
+ * Trims the number of suggestions per suggest text term to the requested size.
+ */
+ public void trim() {
+ for (Term term : terms) {
+ term.trim(size);
+ }
+ }
+
+ @Override
+ public void readFrom(StreamInput in) throws IOException {
+ name = in.readString();
+ size = in.readVInt();
+ sort = Sort.fromId(in.readByte());
+ int size = in.readVInt();
+ terms.clear();
+ for (int i = 0; i < size; i++) {
+ terms.add(Term.read(in));
+ }
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeString(name);
+ out.writeVInt(size);
+ out.writeByte(sort.id());
+ out.writeVInt(terms.size());
+ for (Term term : terms) {
+ term.writeTo(out);
+ }
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject(name);
+ builder.startArray(Fields.TERMS);
+ for (Term term : terms) {
+ term.toXContent(builder, params);
+ }
+ builder.endArray();
+ builder.endObject();
+ return builder;
+ }
+
+
+ /**
+ * Represents a term from the suggest text, that contains the term, start/end offsets and zero or more suggested
+ * terms for this term in the suggested text.
+ */
+ public static class Term implements Streamable, ToXContent {
+
+ static class Fields {
+
+ static final XContentBuilderString TERM = new XContentBuilderString("term");
+ static final XContentBuilderString SUGGESTIONS = new XContentBuilderString("suggestions");
+ static final XContentBuilderString START_OFFSET = new XContentBuilderString("start_offset");
+ static final XContentBuilderString END_OFFSET = new XContentBuilderString("end_offset");
+
+ }
+
+ private Text term;
+ private int startOffset;
+ private int endOffset;
+
+ private List suggested;
+
+ public Term(Text term, int startOffset, int endOffset) {
+ this.term = term;
+ this.startOffset = startOffset;
+ this.endOffset = endOffset;
+ this.suggested = new ArrayList(5);
+ }
+
+ Term() {
+ }
+
+ void addSuggested(SuggestedTerm suggestedTerm) {
+ suggested.add(suggestedTerm);
+ }
+
+ void reduce(Term otherTerm, Sort sort) {
+ assert term.equals(otherTerm.term());
+ assert startOffset == otherTerm.startOffset;
+ assert endOffset == otherTerm.endOffset;
+
+ for (SuggestedTerm otherSuggestedTerm : otherTerm.suggested) {
+ int index = suggested.indexOf(otherSuggestedTerm);
+ if (index >= 0) {
+ SuggestedTerm thisSuggestedTerm = suggested.get(index);
+ thisSuggestedTerm.setFrequency(thisSuggestedTerm.frequency + otherSuggestedTerm.frequency);
+ } else {
+ suggested.add(otherSuggestedTerm);
+ }
+ }
+
+ Comparator comparator;
+ switch (sort) {
+ case SCORE:
+ comparator = SuggestPhase.SCORE;
+ break;
+ case FREQUENCY:
+ comparator = SuggestPhase.FREQUENCY;
+ break;
+ default:
+ throw new ElasticSearchException("Could not resolve comparator in reduce phase.");
+ }
+ Collections.sort(suggested, comparator);
+ }
+
+ public Text term() {
+ return term;
+ }
+
+ /**
+ * @return the term (analyzed by suggest analyzer) originating from the suggest text.
+ */
+ public String getTerm() {
+ return term().string();
+ }
+
+ /**
+ * @return the start offset of this term in the suggest text.
+ */
+ public int getStartOffset() {
+ return startOffset;
+ }
+
+ /**
+ * @return the end offset of this term in the suggest text.
+ */
+ public int getEndOffset() {
+ return endOffset;
+ }
+
+ /**
+ * @return The suggested terms for this particular suggest text term. If there are no suggested terms then
+ * an empty list is returned.
+ */
+ public List getSuggested() {
+ return suggested;
+ }
+
+ void trim(int size) {
+ int suggestionsToRemove = Math.max(0, suggested.size() - size);
+ for (int i = 0; i < suggestionsToRemove; i++) {
+ suggested.remove(suggested.size() - 1);
+ }
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ Term term = (Term) o;
+
+ if (endOffset != term.endOffset) return false;
+ if (startOffset != term.startOffset) return false;
+ if (!this.term.equals(term.term)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = term.hashCode();
+ result = 31 * result + startOffset;
+ result = 31 * result + endOffset;
+ return result;
+ }
+
+ static Term read(StreamInput in) throws IOException {
+ Term term = new Term();
+ term.readFrom(in);
+ return term;
+ }
+
+ @Override
+ public void readFrom(StreamInput in) throws IOException {
+ term = in.readText();
+ startOffset = in.readVInt();
+ endOffset = in.readVInt();
+ int suggestedWords = in.readVInt();
+ suggested = new ArrayList(suggestedWords);
+ for (int j = 0; j < suggestedWords; j++) {
+ suggested.add(SuggestedTerm.create(in));
+ }
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeText(term);
+ out.writeVInt(startOffset);
+ out.writeVInt(endOffset);
+ out.writeVInt(suggested.size());
+ for (SuggestedTerm suggestedTerm : suggested) {
+ suggestedTerm.writeTo(out);
+ }
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject();
+ builder.field(Fields.TERM, term);
+ builder.field(Fields.START_OFFSET, startOffset);
+ builder.field(Fields.END_OFFSET, endOffset);
+ builder.startArray(Fields.SUGGESTIONS);
+ for (SuggestedTerm suggestedTerm : suggested) {
+ suggestedTerm.toXContent(builder, params);
+ }
+ builder.endArray();
+ builder.endObject();
+ return builder;
+ }
+
+ /**
+ * Represents the suggested term, containing a term and its document frequency and score.
+ */
+ public static class SuggestedTerm implements Streamable, ToXContent {
+
+ static class Fields {
+
+ static final XContentBuilderString TERM = new XContentBuilderString("term");
+ static final XContentBuilderString FREQUENCY = new XContentBuilderString("frequency");
+ static final XContentBuilderString SCORE = new XContentBuilderString("score");
+
+ }
+
+ private Text term;
+ private int frequency;
+ private float score;
+
+ SuggestedTerm(Text term, int frequency, float score) {
+ this.term = term;
+ this.frequency = frequency;
+ this.score = score;
+ }
+
+ SuggestedTerm() {
+ }
+
+ public void setFrequency(int frequency) {
+ this.frequency = frequency;
+ }
+
+ /**
+ * @return The actual term.
+ */
+ public Text getTerm() {
+ return term;
+ }
+
+ /**
+ * @return How often this suggested term appears in the index.
+ */
+ public int getFrequency() {
+ return frequency;
+ }
+
+ /**
+ * @return The score based on the edit distance difference between the suggested term and the
+ * term in the suggest text.
+ */
+ public float getScore() {
+ return score;
+ }
+
+ static SuggestedTerm create(StreamInput in) throws IOException {
+ SuggestedTerm suggestion = new SuggestedTerm();
+ suggestion.readFrom(in);
+ return suggestion;
+ }
+
+ @Override
+ public void readFrom(StreamInput in) throws IOException {
+ term = in.readText();
+ frequency = in.readVInt();
+ score = in.readFloat();
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeText(term);
+ out.writeVInt(frequency);
+ out.writeFloat(score);
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject();
+ builder.field(Fields.TERM, term);
+ builder.field(Fields.FREQUENCY, frequency);
+ builder.field(Fields.SCORE, score);
+ builder.endObject();
+ return builder;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ SuggestedTerm that = (SuggestedTerm) o;
+ return term.equals(that.term);
+
+ }
+
+ @Override
+ public int hashCode() {
+ return term.hashCode();
+ }
+ }
+
+ }
+
+ enum Sort {
+
+ /**
+ * Sort should first be based on score.
+ */
+ SCORE((byte) 0x0),
+
+ /**
+ * Sort should first be based on document frequency.
+ */
+ FREQUENCY((byte) 0x1);
+
+ private byte id;
+
+ private Sort(byte id) {
+ this.id = id;
+ }
+
+ public byte id() {
+ return id;
+ }
+
+ static Sort fromId(byte id) {
+ if (id == 0) {
+ return SCORE;
+ } else if (id == 1) {
+ return FREQUENCY;
+ } else {
+ throw new ElasticSearchException("Illegal suggest sort " + id);
+ }
+ }
+
+ }
+
+ }
+
+}
diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java b/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java
new file mode 100644
index 00000000000..55f8213078a
--- /dev/null
+++ b/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java
@@ -0,0 +1,383 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest;
+
+import org.elasticsearch.ElasticSearchIllegalArgumentException;
+import org.elasticsearch.common.xcontent.ToXContent;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Defines how to perform suggesting. This builders allows a number of global options to be specified and
+ * an arbitrary number of {@link org.elasticsearch.search.suggest.SuggestBuilder.FuzzySuggestion} instances.
+ *
+ * Suggesting works by suggesting terms that appear in the suggest text that are similar compared to the terms in
+ * provided text. These spelling suggestions are based on several options described in this class.
+ */
+public class SuggestBuilder implements ToXContent {
+
+ private String globalText;
+
+ private final List suggestions = new ArrayList();
+
+ /**
+ * Sets the text to provide suggestions for. The suggest text is a required option that needs
+ * to be set either via this setter or via the {@link org.elasticsearch.search.suggest.SuggestBuilder.Suggestion#setText(String)} method.
+ *
+ * The suggest text gets analyzed by the suggest analyzer or the suggest field search analyzer.
+ * For each analyzed token, suggested terms are suggested if possible.
+ */
+ public SuggestBuilder setText(String globalText) {
+ this.globalText = globalText;
+ return this;
+ }
+
+ /**
+ * Adds an {@link org.elasticsearch.search.suggest.SuggestBuilder.FuzzySuggestion} instance under a user defined name.
+ * The order in which the Suggestions
are added, is the same as in the response.
+ */
+ public SuggestBuilder addSuggestion(Suggestion suggestion) {
+ suggestions.add(suggestion);
+ return this;
+ }
+
+ /**
+ * Returns all suggestions with the defined names.
+ */
+ public List getSuggestion() {
+ return suggestions;
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject("suggest");
+ if (globalText != null) {
+ builder.field("text", globalText);
+ }
+
+ builder.startObject("suggestions");
+ for (Suggestion suggestion : suggestions) {
+ builder = suggestion.toXContent(builder, params);
+ }
+ builder.endObject();
+
+ builder.endObject();
+ return builder;
+ }
+
+ /**
+ * Convenience factory method.
+ *
+ * @param name The name of this suggestion. This is a required parameter.
+ */
+ public static FuzzySuggestion fuzzySuggestion(String name) {
+ return new FuzzySuggestion(name);
+ }
+
+ public static abstract class Suggestion implements ToXContent {
+
+ private String name;
+ private String suggester;
+ private String text;
+
+ public Suggestion(String name, String suggester) {
+ this.name = name;
+ this.suggester = suggester;
+ }
+
+ /**
+ * Same as in {@link SuggestBuilder#setText(String)}, but in the suggestion scope.
+ */
+ public T setText(String text) {
+ this.text = text;
+ return (T) this;
+ }
+
+ @Override
+ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+ builder.startObject(name);
+ if (suggester != null) {
+ builder.field("suggester", suggester);
+ }
+ if (text != null) {
+ builder.field("text", text);
+ }
+ builder = innerToXContent(builder, params);
+ builder.endObject();
+ return builder;
+ }
+
+ protected abstract XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException;
+ }
+
+ /**
+ * Defines the actual suggest command. Each command uses the global options unless defined in the suggestion itself.
+ * All options are the same as the global options, but are only applicable for this suggestion.
+ */
+ public static class FuzzySuggestion extends Suggestion {
+
+ private String field;
+ private String analyzer;
+ private String suggestMode;
+ private Float accuracy;
+ private Integer size;
+ private String sort;
+ private String stringDistance;
+ private Boolean lowerCaseTerms;
+ private Integer maxEdits;
+ private Integer factor;
+ private Float maxTermFreq;
+ private Integer prefixLength;
+ private Integer minWordLength;
+ private Float minDocFreq;
+ private Integer shardSize;
+
+ /**
+ * @param name The name of this suggestion. This is a required parameter.
+ */
+ public FuzzySuggestion(String name) {
+ super(name, "fuzzy");
+ }
+
+ /**
+ * Sets from what field to fetch the candidate suggestions from. This is an required option and needs to be set
+ * via this setter or {@link org.elasticsearch.search.suggest.SuggestBuilder.FuzzySuggestion#setField(String)} method
+ */
+ public FuzzySuggestion setField(String field) {
+ this.field = field;
+ return this;
+ }
+
+ /**
+ * Sets the analyzer to analyse to suggest text with. Defaults to the search analyzer of the suggest field.
+ */
+ public FuzzySuggestion setAnalyzer(String analyzer) {
+ this.analyzer = analyzer;
+ return this;
+ }
+
+ /**
+ * The global suggest mode controls what suggested terms are included or controls for what suggest text tokens,
+ * terms should be suggested for. Three possible values can be specified:
+ *
+ * missing
- Only suggest terms in the suggest text that aren't in the index. This is the default.
+ * popular
- Only suggest terms that occur in more docs then the original suggest text term.
+ * always
- Suggest any matching suggest terms based on tokens in the suggest text.
+ *
+ */
+ public FuzzySuggestion setSuggestMode(String suggestMode) {
+ this.suggestMode = suggestMode;
+ return this;
+ }
+
+ /**
+ * Sets how similar the suggested terms at least need to be compared to the original suggest text tokens.
+ * A value between 0 and 1 can be specified. This value will be compared to the string distance result of each
+ * candidate spelling correction.
+ *
+ * Default is 0.5f.
+ */
+ public FuzzySuggestion setAccuracy(float accuracy) {
+ this.accuracy = accuracy;
+ return this;
+ }
+
+ /**
+ * Sets the maximum suggestions to be returned per suggest text term.
+ */
+ public FuzzySuggestion setSize(int size) {
+ if (size <= 0) {
+ throw new ElasticSearchIllegalArgumentException("Size must be positive");
+ }
+
+ this.size = size;
+ return this;
+ }
+
+ /**
+ * Sets how to sort the suggest terms per suggest text token.
+ * Two possible values:
+ *
+ * score
- Sort should first be based on score, then document frequency and then the term itself.
+ * frequency
- Sort should first be based on document frequency, then scotr and then the term itself.
+ *
+ *
+ * What the score is depends on the suggester being used.
+ */
+ public FuzzySuggestion setSort(String sort) {
+ this.sort = sort;
+ return this;
+ }
+
+ /**
+ * Sets what string distance implementation to use for comparing how similar suggested terms are.
+ * Four possible values can be specified:
+ *
+ * internal
- This is the default and is based on damerau_levenshtein
, but
+ * highly optimized for comparing string distance for terms inside the index.
+ * damerau_levenshtein
- String distance algorithm based on Damerau-Levenshtein algorithm.
+ * levenstein
- String distance algorithm based on Levenstein edit distance algorithm.
+ * jarowinkler
- String distance algorithm based on Jaro-Winkler algorithm.
+ * ngram
- String distance algorithm based on n-grams.
+ *
+ */
+ public FuzzySuggestion setStringDistance(String stringDistance) {
+ this.stringDistance = stringDistance;
+ return this;
+ }
+
+ /**
+ * Sets whether to lowercase the suggest text tokens just before suggesting terms.
+ */
+ public FuzzySuggestion setLowerCaseTerms(Boolean lowerCaseTerms) {
+ this.lowerCaseTerms = lowerCaseTerms;
+ return this;
+ }
+
+ /**
+ * Sets the maximum edit distance candidate suggestions can have in order to be considered as a suggestion.
+ * Can only be a value between 1 and 2. Any other value result in an bad request error being thrown. Defaults to 2.
+ */
+ public FuzzySuggestion setMaxEdits(Integer maxEdits) {
+ this.maxEdits = maxEdits;
+ return this;
+ }
+
+ /**
+ * A factor that is used to multiply with the size in order to inspect more candidate suggestions.
+ * Can improve accuracy at the cost of performance. Defaults to 5.
+ */
+ public FuzzySuggestion setFactor(Integer factor) {
+ this.factor = factor;
+ return this;
+ }
+
+ /**
+ * Sets a maximum threshold in number of documents a suggest text token can exist in order to be corrected.
+ * Can be a relative percentage number (e.g 0.4) or an absolute number to represent document frequencies.
+ * If an value higher than 1 is specified then fractional can not be specified. Defaults to 0.01f.
+ *
+ * This can be used to exclude high frequency terms from being suggested. High frequency terms are usually
+ * spelled correctly on top of this this also improves the suggest performance.
+ */
+ public FuzzySuggestion setMaxTermFreq(float maxTermFreq) {
+ this.maxTermFreq = maxTermFreq;
+ return this;
+ }
+
+ /**
+ * Sets the number of minimal prefix characters that must match in order be a candidate suggestion.
+ * Defaults to 1. Increasing this number improves suggest performance. Usually misspellings don't occur in the
+ * beginning of terms.
+ */
+ public FuzzySuggestion setPrefixLength(int prefixLength) {
+ this.prefixLength = prefixLength;
+ return this;
+ }
+
+ /**
+ * The minimum length a suggest text term must have in order to be corrected. Defaults to 4.
+ */
+ public FuzzySuggestion setMinWordLength(int minWordLength) {
+ this.minWordLength = minWordLength;
+ return this;
+ }
+
+ /**
+ * Sets a minimal threshold in number of documents a suggested term should appear in. This can be specified as
+ * an absolute number or as a relative percentage of number of documents. This can improve quality by only suggesting
+ * high frequency terms. Defaults to 0f and is not enabled. If a value higher than 1 is specified then the number
+ * cannot be fractional.
+ */
+ public FuzzySuggestion setMinDocFreq(float minDocFreq) {
+ this.minDocFreq = minDocFreq;
+ return this;
+ }
+
+ /**
+ * Sets the maximum number of suggested term to be retrieved from each individual shard. During the reduce
+ * phase the only the top N suggestions are returned based on the size
option. Defaults to the
+ * size
option.
+ *
+ * Setting this to a value higher than the `size` can be useful in order to get a more accurate document frequency
+ * for suggested terms. Due to the fact that terms are partitioned amongst shards, the shard level document
+ * frequencies of suggestions may not be precise. Increasing this will make these document frequencies
+ * more precise.
+ */
+ public FuzzySuggestion setShardSize(Integer shardSize) {
+ this.shardSize = shardSize;
+ return this;
+ }
+
+ @Override
+ public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
+ if (analyzer != null) {
+ builder.field("analyzer", analyzer);
+ }
+ if (field != null) {
+ builder.field("field", field);
+ }
+ if (suggestMode != null) {
+ builder.field("suggest_mode", suggestMode);
+ }
+ if (accuracy != null) {
+ builder.field("accuracy", accuracy);
+ }
+ if (size != null) {
+ builder.field("size", size);
+ }
+ if (sort != null) {
+ builder.field("sort", sort);
+ }
+ if (stringDistance != null) {
+ builder.field("string_distance", stringDistance);
+ }
+ if (lowerCaseTerms != null) {
+ builder.field("lowercase_terms", lowerCaseTerms);
+ }
+ if (maxEdits != null) {
+ builder.field("max_edits", maxEdits);
+ }
+ if (factor != null) {
+ builder.field("factor", factor);
+ }
+ if (maxTermFreq != null) {
+ builder.field("max_term_freq", maxTermFreq);
+ }
+ if (prefixLength != null) {
+ builder.field("prefix_length", prefixLength);
+ }
+ if (minWordLength != null) {
+ builder.field("min_word_len", minWordLength);
+ }
+ if (minDocFreq != null) {
+ builder.field("min_doc_freq", minDocFreq);
+ }
+ if (shardSize != null) {
+ builder.field("shard_size", shardSize);
+ }
+ return builder;
+ }
+ }
+
+}
diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java b/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java
new file mode 100644
index 00000000000..bfb60477b62
--- /dev/null
+++ b/src/main/java/org/elasticsearch/search/suggest/SuggestParseElement.java
@@ -0,0 +1,235 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.search.spell.*;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.LevenshteinAutomata;
+import org.elasticsearch.ElasticSearchIllegalArgumentException;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.search.SearchParseElement;
+import org.elasticsearch.search.internal.SearchContext;
+
+/**
+ *
+ */
+public class SuggestParseElement implements SearchParseElement {
+
+ @Override
+ public void parse(XContentParser parser, SearchContext context) throws Exception {
+ SuggestionSearchContext suggestionSearchContext = new SuggestionSearchContext();
+
+ BytesRef globalText = null;
+
+ Analyzer defaultAnalyzer = context.mapperService().searchAnalyzer();
+ float defaultAccuracy = SpellChecker.DEFAULT_ACCURACY;
+ int defaultSize = 5;
+ SuggestMode defaultSuggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
+ Suggest.Suggestion.Sort defaultSort = Suggest.Suggestion.Sort.SCORE;
+ StringDistance defaultStringDistance = DirectSpellChecker.INTERNAL_LEVENSHTEIN;
+ boolean defaultLowerCaseTerms = false; // changed from Lucene default because we rely on search analyzer to properly handle it
+ int defaultMaxEdits = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
+ int defaultFactor = 5;
+ float defaultMaxTermFreq = 0.01f;
+ int defaultPrefixLength = 1;
+ int defaultMinQueryLength = 4;
+ float defaultMinDocFreq = 0f;
+
+ String fieldName = null;
+ XContentParser.Token token;
+ while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+ if (token == XContentParser.Token.FIELD_NAME) {
+ fieldName = parser.currentName();
+ } else if (token.isValue()) {
+ if ("text".equals(fieldName)) {
+ globalText = parser.bytes();
+ } else {
+ throw new ElasticSearchIllegalArgumentException("[suggest] does not support [" + fieldName + "]");
+ }
+ } else if (token == XContentParser.Token.START_OBJECT) {
+ // TODO: Once we have more suggester impls we need to have different parsing logic per suggester.
+ // This code is now specific for the fuzzy suggester
+ if ("suggestions".equals(fieldName)) {
+ while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+ if (token == XContentParser.Token.FIELD_NAME) {
+ fieldName = parser.currentName();
+ } else if (token == XContentParser.Token.START_OBJECT) {
+ SuggestionSearchContext.Suggestion suggestion = new SuggestionSearchContext.Suggestion();
+ suggestionSearchContext.addSuggestion(fieldName, suggestion);
+
+ while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+ if (token == XContentParser.Token.FIELD_NAME) {
+ fieldName = parser.currentName();
+ } else if (token.isValue()) {
+ if ("suggester".equals(fieldName)) {
+ suggestion.suggester(parser.text());
+ } else if ("analyzer".equals(fieldName)) {
+ String analyzerName = parser.text();
+ Analyzer analyzer = context.mapperService().analysisService().analyzer(analyzerName);
+ if (analyzer == null) {
+ throw new ElasticSearchIllegalArgumentException("Analyzer [" + analyzerName + "] doesn't exists");
+ }
+ suggestion.analyzer(analyzer);
+ } else if ("text".equals(fieldName)) {
+ suggestion.text(parser.bytes());
+ } else if ("field".equals(fieldName)) {
+ suggestion.setField(parser.text());
+ } else if ("accuracy".equals(fieldName)) {
+ suggestion.accuracy(parser.floatValue());
+ } else if ("size".equals(fieldName)) {
+ suggestion.size(parser.intValue());
+ } else if ("suggest_mode".equals(fieldName) || "suggestMode".equals(fieldName)) {
+ suggestion.suggestMode(resolveSuggestMode(parser.text()));
+ } else if ("sort".equals(fieldName)) {
+ suggestion.sort(resolveSort(parser.text()));
+ } else if ("string_distance".equals(fieldName) || "stringDistance".equals(fieldName)) {
+ suggestion.stringDistance(resolveDistance(parser.text()));
+ } else if ("lowercase_terms".equals(fieldName) || "lowercaseTerms".equals(fieldName)) {
+ suggestion.lowerCaseTerms(parser.booleanValue());
+ } else if ("max_edits".equals(fieldName) || "maxEdits".equals(fieldName) || "fuzziness".equals(fieldName)) {
+ suggestion.maxEdits(parser.intValue());
+ if (suggestion.maxEdits() < 1 || suggestion.maxEdits() > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
+ throw new ElasticSearchIllegalArgumentException("Illegal max_edits value " + suggestion.maxEdits());
+ }
+ } else if ("factor".equals(fieldName)) {
+ suggestion.factor(parser.intValue());
+ } else if ("max_term_freq".equals(fieldName) || "maxTermFreq".equals(fieldName)) {
+ suggestion.maxTermFreq(parser.floatValue());
+ } else if ("prefix_length".equals(fieldName) || "prefixLength".equals(fieldName)) {
+ suggestion.prefixLength(parser.intValue());
+ } else if ("min_word_len".equals(fieldName) || "minWordLen".equals(fieldName)) {
+ suggestion.minQueryLength(parser.intValue());
+ } else if ("min_doc_freq".equals(fieldName) || "minDocFreq".equals(fieldName)) {
+ suggestion.minDocFreq(parser.floatValue());
+ } else if ("shard_size".equals(fieldName) || "shardSize".equals(fieldName)) {
+ suggestion.shardSize(parser.intValue());
+ } else {
+ throw new ElasticSearchIllegalArgumentException("suggester[fuzzy] doesn't support [" + fieldName + "]");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Verify options and set defaults
+ for (SuggestionSearchContext.Suggestion command : suggestionSearchContext.suggestions().values()) {
+ if (command.suggester() == null) {
+ throw new ElasticSearchIllegalArgumentException("The required suggester option is missing");
+ }
+ if (command.field() == null) {
+ throw new ElasticSearchIllegalArgumentException("The required field option is missing");
+ }
+
+ if (command.text() == null) {
+ if (globalText == null) {
+ throw new ElasticSearchIllegalArgumentException("The required text option is missing");
+ }
+
+ command.text(globalText);
+ }
+ if (command.analyzer() == null) {
+ command.analyzer(defaultAnalyzer);
+ }
+ if (command.accuracy() == null) {
+ command.accuracy(defaultAccuracy);
+ }
+ if (command.size() == null) {
+ command.size(defaultSize);
+ }
+ if (command.suggestMode() == null) {
+ command.suggestMode(defaultSuggestMode);
+ }
+ if (command.sort() == null) {
+ command.sort(defaultSort);
+ }
+ if (command.stringDistance() == null) {
+ command.stringDistance(defaultStringDistance);
+ }
+ if (command.lowerCaseTerms() == null) {
+ command.lowerCaseTerms(defaultLowerCaseTerms);
+ }
+ if (command.maxEdits() == null) {
+ command.maxEdits(defaultMaxEdits);
+ }
+ if (command.factor() == null) {
+ command.factor(defaultFactor);
+ }
+ if (command.maxTermFreq() == null) {
+ command.maxTermFreq(defaultMaxTermFreq);
+ }
+ if (command.prefixLength() == null) {
+ command.prefixLength(defaultPrefixLength);
+ }
+ if (command.minWordLength() == null) {
+ command.minQueryLength(defaultMinQueryLength);
+ }
+ if (command.minDocFreq() == null) {
+ command.minDocFreq(defaultMinDocFreq);
+ }
+ if (command.shardSize() == null) {
+ command.shardSize(defaultSize);
+ }
+ }
+ context.suggest(suggestionSearchContext);
+ }
+
+ private SuggestMode resolveSuggestMode(String sortVal) {
+ if ("missing".equals(sortVal)) {
+ return SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
+ } else if ("popular".equals(sortVal)) {
+ return SuggestMode.SUGGEST_MORE_POPULAR;
+ } else if ("always".equals(sortVal)) {
+ return SuggestMode.SUGGEST_ALWAYS;
+ } else {
+ throw new ElasticSearchIllegalArgumentException("Illegal suggest mode " + sortVal);
+ }
+ }
+
+ private Suggest.Suggestion.Sort resolveSort(String sortVal) {
+ if ("score".equals(sortVal)) {
+ return Suggest.Suggestion.Sort.SCORE;
+ } else if ("frequency".equals(sortVal)) {
+ return Suggest.Suggestion.Sort.FREQUENCY;
+ } else {
+ throw new ElasticSearchIllegalArgumentException("Illegal suggest sort " + sortVal);
+ }
+ }
+
+ private StringDistance resolveDistance(String distanceVal) {
+ if ("internal".equals(distanceVal)) {
+ return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
+ } else if ("damerau_levenshtein".equals(distanceVal)) {
+ return new LuceneLevenshteinDistance();
+ } else if ("levenstein".equals(distanceVal)) {
+ return new LevensteinDistance();
+ } else if ("jarowinkler".equals(distanceVal)) {
+ return new JaroWinklerDistance();
+ } else if ("ngram".equals(distanceVal)) {
+ return new NGramDistance();
+ } else {
+ throw new ElasticSearchIllegalArgumentException("Illegal distance option " + distanceVal);
+ }
+ }
+
+}
diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestPhase.java b/src/main/java/org/elasticsearch/search/suggest/SuggestPhase.java
new file mode 100644
index 00000000000..811c01eca23
--- /dev/null
+++ b/src/main/java/org/elasticsearch/search/suggest/SuggestPhase.java
@@ -0,0 +1,231 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest;
+
+import com.google.common.collect.ImmutableMap;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spell.DirectSpellChecker;
+import org.apache.lucene.search.spell.SuggestWord;
+import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
+import org.apache.lucene.search.spell.SuggestWordQueue;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.UnicodeUtil;
+import org.elasticsearch.ElasticSearchException;
+import org.elasticsearch.ElasticSearchIllegalArgumentException;
+import org.elasticsearch.common.bytes.BytesArray;
+import org.elasticsearch.common.component.AbstractComponent;
+import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.common.io.FastCharArrayReader;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.text.BytesText;
+import org.elasticsearch.common.text.StringText;
+import org.elasticsearch.common.text.Text;
+import org.elasticsearch.search.SearchParseElement;
+import org.elasticsearch.search.SearchPhase;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+
+import static org.elasticsearch.search.suggest.Suggest.Suggestion;
+
+/**
+ */
+public class SuggestPhase extends AbstractComponent implements SearchPhase {
+
+ @Inject
+ public SuggestPhase(Settings settings) {
+ super(settings);
+ }
+
+ @Override
+ public Map parseElements() {
+ ImmutableMap.Builder parseElements = ImmutableMap.builder();
+ parseElements.put("suggest", new SuggestParseElement());
+ return parseElements.build();
+ }
+
+ @Override
+ public void preProcess(SearchContext context) {
+ }
+
+ @Override
+ public void execute(SearchContext context) throws ElasticSearchException {
+ SuggestionSearchContext suggest = context.suggest();
+ if (suggest == null) {
+ return;
+ }
+
+ try {
+ CharsRef spare = new CharsRef(); // Maybe add CharsRef to CacheRecycler?
+ List suggestions = new ArrayList(2);
+ for (Map.Entry entry : suggest.suggestions().entrySet()) {
+ SuggestionSearchContext.Suggestion suggestion = entry.getValue();
+ if ("fuzzy".equals(suggestion.suggester())) {
+ suggestions.add(executeDirectSpellChecker(entry.getKey(), suggestion, context, spare));
+ } else {
+ throw new ElasticSearchIllegalArgumentException("Unsupported suggester[" + suggestion.suggester() + "]");
+ }
+ }
+ context.queryResult().suggest(new Suggest(suggestions));
+ } catch (IOException e) {
+ throw new ElasticSearchException("I/O exception during suggest phase", e);
+ }
+ }
+
+ private Suggestion executeDirectSpellChecker(String name, SuggestionSearchContext.Suggestion suggestion, SearchContext context, CharsRef spare) throws IOException {
+ DirectSpellChecker directSpellChecker = new DirectSpellChecker();
+ directSpellChecker.setAccuracy(suggestion.accuracy());
+ Comparator comparator;
+ switch (suggestion.sort()) {
+ case SCORE:
+ comparator = SuggestWordQueue.DEFAULT_COMPARATOR;
+ break;
+ case FREQUENCY:
+ comparator = LUCENE_FREQUENCY;
+ break;
+ default:
+ throw new ElasticSearchIllegalArgumentException("Illegal suggest sort: " + suggestion.sort());
+ }
+ directSpellChecker.setComparator(comparator);
+ directSpellChecker.setDistance(suggestion.stringDistance());
+ directSpellChecker.setLowerCaseTerms(suggestion.lowerCaseTerms());
+ directSpellChecker.setMaxEdits(suggestion.maxEdits());
+ directSpellChecker.setMaxInspections(suggestion.factor());
+ directSpellChecker.setMaxQueryFrequency(suggestion.maxTermFreq());
+ directSpellChecker.setMinPrefix(suggestion.prefixLength());
+ directSpellChecker.setMinQueryLength(suggestion.minWordLength());
+ directSpellChecker.setThresholdFrequency(suggestion.minDocFreq());
+
+ Suggestion response = new Suggestion(
+ name, suggestion.size(), suggestion.sort()
+ );
+ List tokens = queryTerms(suggestion, spare);
+ for (Token token : tokens) {
+ IndexReader indexReader = context.searcher().getIndexReader();
+ // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
+ SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
+ token.term, suggestion.shardSize(), indexReader, suggestion.suggestMode()
+ );
+ Text key = new BytesText(new BytesArray(token.term.bytes()));
+ Suggestion.Term resultTerm = new Suggestion.Term(key, token.startOffset, token.endOffset);
+ for (SuggestWord suggestWord : suggestedWords) {
+ Text word = new StringText(suggestWord.string);
+ resultTerm.addSuggested(new Suggestion.Term.SuggestedTerm(word, suggestWord.freq, suggestWord.score));
+ }
+ response.addTerm(resultTerm);
+ }
+ return response;
+ }
+
+ private List queryTerms(SuggestionSearchContext.Suggestion suggestion, CharsRef spare) throws IOException {
+ UnicodeUtil.UTF8toUTF16(suggestion.text(), spare);
+ TokenStream ts = suggestion.analyzer().tokenStream(
+ suggestion.field(), new FastCharArrayReader(spare.chars, spare.offset, spare.length)
+ );
+ ts.reset();
+
+ TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
+ OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
+ BytesRef termRef = termAtt.getBytesRef();
+
+ List result = new ArrayList(5);
+ while (ts.incrementToken()) {
+ termAtt.fillBytesRef();
+ Term term = new Term(suggestion.field(), BytesRef.deepCopyOf(termRef));
+ result.add(new Token(term, offsetAtt.startOffset(), offsetAtt.endOffset()));
+ }
+ return result;
+ }
+
+ private static Comparator LUCENE_FREQUENCY = new SuggestWordFrequencyComparator();
+ public static Comparator SCORE = new Score();
+ public static Comparator FREQUENCY = new Frequency();
+
+ // Same behaviour as comparators in suggest module, but for SuggestedWord
+ // Highest score first, then highest freq first, then lowest term first
+ public static class Score implements Comparator {
+
+ @Override
+ public int compare(Suggestion.Term.SuggestedTerm first, Suggestion.Term.SuggestedTerm second) {
+ // first criteria: the distance
+ int cmp = Float.compare(second.getScore(), first.getScore());
+ if (cmp != 0) {
+ return cmp;
+ }
+
+ // second criteria (if first criteria is equal): the popularity
+ cmp = second.getFrequency() - first.getFrequency();
+ if (cmp != 0) {
+ return cmp;
+ }
+ // third criteria: term text
+ return first.getTerm().compareTo(second.getTerm());
+ }
+
+ }
+
+ // Same behaviour as comparators in suggest module, but for SuggestedWord
+ // Highest freq first, then highest score first, then lowest term first
+ public static class Frequency implements Comparator {
+
+ @Override
+ public int compare(Suggestion.Term.SuggestedTerm first, Suggestion.Term.SuggestedTerm second) {
+ // first criteria: the popularity
+ int cmp = second.getFrequency() - first.getFrequency();
+ if (cmp != 0) {
+ return cmp;
+ }
+
+ // second criteria (if first criteria is equal): the distance
+ cmp = Float.compare(second.getScore(), first.getScore());
+ if (cmp != 0) {
+ return cmp;
+ }
+
+ // third criteria: term text
+ return first.getTerm().compareTo(second.getTerm());
+ }
+
+ }
+
+ private static class Token {
+
+ public final Term term;
+ public final int startOffset;
+ public final int endOffset;
+
+ private Token(Term term, int startOffset, int endOffset) {
+ this.term = term;
+ this.startOffset = startOffset;
+ this.endOffset = endOffset;
+ }
+
+ }
+
+}
diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java b/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java
new file mode 100644
index 00000000000..54952ed76d9
--- /dev/null
+++ b/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java
@@ -0,0 +1,206 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.suggest;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.search.spell.StringDistance;
+import org.apache.lucene.search.spell.SuggestMode;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.ElasticSearchIllegalArgumentException;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ */
+public class SuggestionSearchContext {
+
+ private final Map suggestions = new LinkedHashMap(4);
+
+ public void addSuggestion(String name, Suggestion suggestion) {
+ suggestions.put(name, suggestion);
+ }
+
+ public Map suggestions() {
+ return suggestions;
+ }
+
+ public static class Suggestion {
+
+ private String suggester;
+ private BytesRef text;
+ private String field;
+ private Analyzer analyzer;
+ private SuggestMode suggestMode;
+ private Float accuracy;
+ private Integer size;
+ private Suggest.Suggestion.Sort sort;
+ private StringDistance stringDistance;
+ private Boolean lowerCaseTerms;
+ private Integer maxEdits;
+ private Integer factor;
+ private Float maxTermFreq;
+ private Integer prefixLength;
+ private Integer minWordLength;
+ private Float minDocFreq;
+ private Integer shardSize;
+
+ public String suggester() {
+ return suggester;
+ }
+
+ public void suggester(String suggester) {
+ this.suggester = suggester;
+ }
+
+ public BytesRef text() {
+ return text;
+ }
+
+ public void text(BytesRef text) {
+ this.text = text;
+ }
+
+ public Analyzer analyzer() {
+ return analyzer;
+ }
+
+ public void analyzer(Analyzer analyzer) {
+ this.analyzer = analyzer;
+ }
+
+ public String field() {
+ return field;
+ }
+
+ public void setField(String field) {
+ this.field = field;
+ }
+
+ public SuggestMode suggestMode() {
+ return suggestMode;
+ }
+
+ public void suggestMode(SuggestMode suggestMode) {
+ this.suggestMode = suggestMode;
+ }
+
+ public Float accuracy() {
+ return accuracy;
+ }
+
+ public void accuracy(float accuracy) {
+ this.accuracy = accuracy;
+ }
+
+ public Integer size() {
+ return size;
+ }
+
+ public void size(int size) {
+ if (size <= 0) {
+ throw new ElasticSearchIllegalArgumentException("Size must be positive");
+ }
+
+ this.size = size;
+ }
+
+ public Suggest.Suggestion.Sort sort() {
+ return sort;
+ }
+
+ public void sort(Suggest.Suggestion.Sort sort) {
+ this.sort = sort;
+ }
+
+ public StringDistance stringDistance() {
+ return stringDistance;
+ }
+
+ public void stringDistance(StringDistance distance) {
+ this.stringDistance = distance;
+ }
+
+ public Boolean lowerCaseTerms() {
+ return lowerCaseTerms;
+ }
+
+ public void lowerCaseTerms(boolean lowerCaseTerms) {
+ this.lowerCaseTerms = lowerCaseTerms;
+ }
+
+ public Integer maxEdits() {
+ return maxEdits;
+ }
+
+ public void maxEdits(int maxEdits) {
+ this.maxEdits = maxEdits;
+ }
+
+ public Integer factor() {
+ return factor;
+ }
+
+ public void factor(int factor) {
+ this.factor = factor;
+ }
+
+ public Float maxTermFreq() {
+ return maxTermFreq;
+ }
+
+ public void maxTermFreq(float maxTermFreq) {
+ this.maxTermFreq = maxTermFreq;
+ }
+
+ public Integer prefixLength() {
+ return prefixLength;
+ }
+
+ public void prefixLength(int prefixLength) {
+ this.prefixLength = prefixLength;
+ }
+
+ public Integer minWordLength() {
+ return minWordLength;
+ }
+
+ public void minQueryLength(int minQueryLength) {
+ this.minWordLength = minQueryLength;
+ }
+
+ public Float minDocFreq() {
+ return minDocFreq;
+ }
+
+ public void minDocFreq(float minDocFreq) {
+ this.minDocFreq = minDocFreq;
+ }
+
+ public Integer shardSize() {
+ return shardSize;
+ }
+
+ public void shardSize(Integer shardSize) {
+ this.shardSize = shardSize;
+ }
+ }
+
+}
diff --git a/src/test/java/org/elasticsearch/benchmark/search/SuggestSearchBenchMark.java b/src/test/java/org/elasticsearch/benchmark/search/SuggestSearchBenchMark.java
new file mode 100644
index 00000000000..799b2fccb73
--- /dev/null
+++ b/src/test/java/org/elasticsearch/benchmark/search/SuggestSearchBenchMark.java
@@ -0,0 +1,166 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.benchmark.search;
+
+import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
+import org.elasticsearch.action.bulk.BulkRequestBuilder;
+import org.elasticsearch.action.bulk.BulkResponse;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.client.Requests;
+import org.elasticsearch.common.StopWatch;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.unit.SizeValue;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.node.Node;
+import org.elasticsearch.search.suggest.Suggest;
+import org.elasticsearch.search.suggest.SuggestBuilder;
+
+import java.io.IOException;
+import java.util.List;
+
+import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
+import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
+import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+import static org.elasticsearch.index.query.QueryBuilders.*;
+import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
+
+/**
+ */
+public class SuggestSearchBenchMark {
+
+ public static void main(String[] args) throws Exception {
+ int SEARCH_ITERS = 200;
+
+ Settings settings = settingsBuilder()
+ .put(SETTING_NUMBER_OF_SHARDS, 1)
+ .put(SETTING_NUMBER_OF_REPLICAS, 0)
+ .build();
+
+ Node[] nodes = new Node[1];
+ for (int i = 0; i < nodes.length; i++) {
+ nodes[i] = nodeBuilder().settings(settingsBuilder().put(settings).put("name", "node" + i)).node();
+ }
+
+ Client client = nodes[0].client();
+ try {
+ client.admin().indices().prepareCreate("test").setSettings(settings).addMapping("type1", XContentFactory.jsonBuilder().startObject().startObject("type1")
+ .startObject("_source").field("enabled", false).endObject()
+ .startObject("_all").field("enabled", false).endObject()
+ .startObject("_type").field("index", "no").endObject()
+ .startObject("_id").field("index", "no").endObject()
+ .startObject("properties")
+ .startObject("field").field("type", "string").field("index", "not_analyzed").field("omit_norms", true).endObject()
+ .endObject()
+ .endObject().endObject()).execute().actionGet();
+ ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
+ if (clusterHealthResponse.timedOut()) {
+ System.err.println("--> Timed out waiting for cluster health");
+ }
+
+ StopWatch stopWatch = new StopWatch().start();
+ long COUNT = SizeValue.parseSizeValue("10m").singles();
+ int BATCH = 100;
+ System.out.println("Indexing [" + COUNT + "] ...");
+ long ITERS = COUNT / BATCH;
+ long i = 1;
+ char character = 'a';
+ int idCounter = 0;
+ for (; i <= ITERS; i++) {
+ int termCounter = 0;
+ BulkRequestBuilder request = client.prepareBulk();
+ for (int j = 0; j < BATCH; j++) {
+ request.add(Requests.indexRequest("test").type("type1").id(Integer.toString(idCounter++)).source(source("prefix" + character + termCounter++)));
+ }
+ character++;
+ BulkResponse response = request.execute().actionGet();
+ if (response.hasFailures()) {
+ System.err.println("failures...");
+ }
+ }
+ System.out.println("Indexing took " + stopWatch.totalTime());
+
+ client.admin().indices().prepareRefresh().execute().actionGet();
+ System.out.println("Count: " + client.prepareCount().setQuery(matchAllQuery()).execute().actionGet().count());
+ } catch (Exception e) {
+ System.out.println("--> Index already exists, ignoring indexing phase, waiting for green");
+ ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("10m").execute().actionGet();
+ if (clusterHealthResponse.timedOut()) {
+ System.err.println("--> Timed out waiting for cluster health");
+ }
+ client.admin().indices().prepareRefresh().execute().actionGet();
+ System.out.println("Count: " + client.prepareCount().setQuery(matchAllQuery()).execute().actionGet().count());
+ }
+
+
+ System.out.println("Warming up...");
+ char startChar = 'a';
+ for (int i = 0; i <= 20; i++) {
+ String term = "prefix" + startChar;
+ SearchResponse response = client.prepareSearch()
+ .setQuery(prefixQuery("field", term))
+ .addSuggestion(new SuggestBuilder.FuzzySuggestion("field").setField("field").setText(term).setSuggestMode("always"))
+ .execute().actionGet();
+ if (response.hits().totalHits() == 0) {
+ System.err.println("No hits");
+ continue;
+ }
+ startChar++;
+ }
+
+
+ System.out.println("Starting benchmarking suggestions.");
+ startChar = 'a';
+ long timeTaken = 0;
+ for (int i = 0; i <= SEARCH_ITERS; i++) {
+ String term = "prefix" + startChar;
+ SearchResponse response = client.prepareSearch()
+ .setQuery(matchQuery("field", term))
+ .addSuggestion(new SuggestBuilder.FuzzySuggestion("field").setText(term).setField("field").setSuggestMode("always"))
+ .execute().actionGet();
+ timeTaken += response.tookInMillis();
+ if (response.suggest() == null) {
+ System.err.println("No suggestions");
+ continue;
+ }
+ List suggestedTerms = response.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested();
+ if (suggestedTerms == null || suggestedTerms.isEmpty()) {
+ System.err.println("No suggestions");
+ }
+ startChar++;
+ }
+
+ System.out.println("Avg time taken without filter " + (timeTaken / SEARCH_ITERS));
+
+ client.close();
+ for (Node node : nodes) {
+ node.close();
+ }
+ }
+
+ private static XContentBuilder source(String nameValue) throws IOException {
+ return jsonBuilder().startObject()
+ .field("field", nameValue)
+ .endObject();
+ }
+
+}
diff --git a/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java
new file mode 100644
index 00000000000..578519f2a9e
--- /dev/null
+++ b/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java
@@ -0,0 +1,348 @@
+/*
+ * Licensed to ElasticSearch and Shay Banon under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. ElasticSearch licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.test.integration.search.suggest;
+
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.test.integration.AbstractNodesTests;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
+import static org.elasticsearch.search.suggest.SuggestBuilder.fuzzySuggestion;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.notNullValue;
+
+/**
+ */
+public class SuggestSearchTests extends AbstractNodesTests {
+
+ private Client client;
+
+ @BeforeClass
+ public void createNodes() throws Exception {
+ startNode("server1");
+ startNode("server2");
+ client = getClient();
+ }
+
+ @AfterClass
+ public void closeNodes() {
+ client.close();
+ closeAllNodes();
+ }
+
+ protected Client getClient() {
+ return client("server1");
+ }
+
+ @Test
+ public void testSimple() throws Exception {
+ try {
+ client.admin().indices().prepareDelete("test").execute().actionGet();
+ } catch (Exception e) {
+ // ignore
+ }
+ client.admin().indices().prepareCreate("test").execute().actionGet();
+
+ client.prepareIndex("test", "type1")
+ .setSource(XContentFactory.jsonBuilder()
+ .startObject()
+ .field("text", "abcd")
+ .endObject()
+ )
+ .execute().actionGet();
+ client.prepareIndex("test", "type1")
+ .setSource(XContentFactory.jsonBuilder()
+ .startObject()
+ .field("text", "aacd")
+ .endObject()
+ )
+ .execute().actionGet();
+ client.prepareIndex("test", "type1")
+ .setSource(XContentFactory.jsonBuilder()
+ .startObject()
+ .field("text", "abbd")
+ .endObject()
+ )
+ .execute().actionGet();
+ client.prepareIndex("test", "type1")
+ .setSource(XContentFactory.jsonBuilder()
+ .startObject()
+ .field("text", "abcc")
+ .endObject()
+ )
+ .execute().actionGet();
+ client.admin().indices().prepareRefresh().execute().actionGet();
+
+ SearchResponse search = client.prepareSearch()
+ .setQuery(matchQuery("text", "spellcecker"))
+ .addSuggestion(
+ fuzzySuggestion("test").setSuggestMode("always") // Always, otherwise the results can vary between requests.
+ .setText("abcd")
+ .setField("text"))
+ .execute().actionGet();
+
+ assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
+ assertThat(search.suggest(), notNullValue());
+ assertThat(search.suggest().getSuggestions().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(0).getName(), equalTo("test"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getTerm(), equalTo("abcd"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().size(), equalTo(3));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("aacd"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(1).getTerm().string(), equalTo("abbd"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(2).getTerm().string(), equalTo("abcc"));
+
+ client.prepareSearch()
+ .addSuggestion(
+ fuzzySuggestion("test").setSuggestMode("always") // Always, otherwise the results can vary between requests.
+ .setText("abcd")
+ .setField("text"))
+ .execute().actionGet();
+
+ assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
+ assertThat(search.suggest(), notNullValue());
+ assertThat(search.suggest().getSuggestions().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(0).getName(), equalTo("test"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().size(), equalTo(3));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("aacd"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(1).getTerm().string(), equalTo("abbd"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(2).getTerm().string(), equalTo("abcc"));
+ }
+
+ @Test
+ public void testEmpty() throws Exception {
+ try {
+ client.admin().indices().prepareDelete("test").execute().actionGet();
+ } catch (Exception e) {
+ // ignore
+ }
+ client.admin().indices().prepareCreate("test").execute().actionGet();
+
+ SearchResponse search = client.prepareSearch()
+ .setQuery(matchQuery("text", "spellcecker"))
+ .addSuggestion(
+ fuzzySuggestion("test").setSuggestMode("always") // Always, otherwise the results can vary between requests.
+ .setText("abcd")
+ .setField("text"))
+ .execute().actionGet();
+
+ assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
+ assertThat(search.suggest(), notNullValue());
+ assertThat(search.suggest().getSuggestions().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(0).getName(), equalTo("test"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getTerm(), equalTo("abcd"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().size(), equalTo(0));
+
+ client.prepareSearch()
+ .addSuggestion(
+ fuzzySuggestion("test").setSuggestMode("always") // Always, otherwise the results can vary between requests.
+ .setText("abcd")
+ .setField("text"))
+ .execute().actionGet();
+
+ assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
+ assertThat(search.suggest(), notNullValue());
+ assertThat(search.suggest().getSuggestions().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(0).getName(), equalTo("test"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().size(), equalTo(0));
+ }
+
+ @Test
+ public void testWithMultipleCommands() throws Exception {
+ try {
+ client.admin().indices().prepareDelete("test").execute().actionGet();
+ } catch (Exception e) {
+ // ignore
+ }
+ client.admin().indices().prepareCreate("test").execute().actionGet();
+
+ client.prepareIndex("test", "type1")
+ .setSource(XContentFactory.jsonBuilder()
+ .startObject()
+ .field("field1", "prefix_abcd")
+ .field("field2", "prefix_efgh")
+ .endObject()
+ )
+ .execute().actionGet();
+ client.prepareIndex("test", "type1")
+ .setSource(XContentFactory.jsonBuilder()
+ .startObject()
+ .field("field1", "prefix_aacd")
+ .field("field2", "prefix_eeeh")
+ .endObject()
+ )
+ .execute().actionGet();
+ client.prepareIndex("test", "type1")
+ .setSource(XContentFactory.jsonBuilder()
+ .startObject()
+ .field("field1", "prefix_abbd")
+ .field("field2", "prefix_efff")
+ .endObject()
+ )
+ .execute().actionGet();
+ client.prepareIndex("test", "type1")
+ .setSource(XContentFactory.jsonBuilder()
+ .startObject()
+ .field("field1", "prefix_abcc")
+ .field("field2", "prefix_eggg")
+ .endObject()
+ )
+ .execute().actionGet();
+ client.admin().indices().prepareRefresh().execute().actionGet();
+
+ SearchResponse search = client.prepareSearch()
+ .addSuggestion(fuzzySuggestion("size1")
+ .setSize(1).setText("prefix_abcd").setMaxTermFreq(10).setMinDocFreq(0)
+ .setField("field1").setSuggestMode("always"))
+ .addSuggestion(fuzzySuggestion("field2")
+ .setField("field2").setText("prefix_eeeh prefix_efgh")
+ .setMaxTermFreq(10).setMinDocFreq(0).setSuggestMode("always"))
+ .addSuggestion(fuzzySuggestion("accuracy")
+ .setField("field2").setText("prefix_efgh").setAccuracy(1f)
+ .setMaxTermFreq(10).setMinDocFreq(0).setSuggestMode("always"))
+ .execute().actionGet();
+
+ assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
+ assertThat(search.suggest(), notNullValue());
+ assertThat(search.suggest().getSuggestions().size(), equalTo(3));
+ assertThat(search.suggest().getSuggestions().get(0).getName(), equalTo("size1"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("prefix_aacd"));
+ assertThat(search.suggest().getSuggestions().get(1).getName(), equalTo("field2"));
+ assertThat(search.suggest().getSuggestions().get(1).getTerms().size(), equalTo(2));
+ assertThat(search.suggest().getSuggestions().get(1).getTerms().get(0).getSuggested().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(1).getTerms().get(1).getSuggested().size(), equalTo(3));
+ assertThat(search.suggest().getSuggestions().get(1).getTerms().get(1).getSuggested().get(0).getTerm().string(), equalTo("prefix_eeeh"));
+ assertThat(search.suggest().getSuggestions().get(1).getTerms().get(1).getSuggested().get(1).getTerm().string(), equalTo("prefix_efff"));
+ assertThat(search.suggest().getSuggestions().get(1).getTerms().get(1).getSuggested().get(2).getTerm().string(), equalTo("prefix_eggg"));
+ assertThat(search.suggest().getSuggestions().get(2).getName(), equalTo("accuracy"));
+ assertThat(search.suggest().getSuggestions().get(2).getTerms().get(0).getSuggested().isEmpty(), equalTo(true));
+ }
+
+ @Test
+ public void testSizeAndSort() throws Exception {
+ try {
+ client.admin().indices().prepareDelete("test").execute().actionGet();
+ } catch (Exception e) {
+ // ignore
+ }
+ client.admin().indices().prepareCreate("test").execute().actionGet();
+
+ Map termsAndDocCount = new HashMap();
+ termsAndDocCount.put("prefix_aaad", 20);
+ termsAndDocCount.put("prefix_abbb", 18);
+ termsAndDocCount.put("prefix_aaca", 16);
+ termsAndDocCount.put("prefix_abba", 14);
+ termsAndDocCount.put("prefix_accc", 12);
+ termsAndDocCount.put("prefix_addd", 10);
+ termsAndDocCount.put("prefix_abaa", 8);
+ termsAndDocCount.put("prefix_dbca", 6);
+ termsAndDocCount.put("prefix_cbad", 4);
+
+ termsAndDocCount.put("prefix_aacd", 1);
+ termsAndDocCount.put("prefix_abcc", 1);
+ termsAndDocCount.put("prefix_accd", 1);
+
+ for (Map.Entry entry : termsAndDocCount.entrySet()) {
+ for (int i = 0; i < entry.getValue(); i++) {
+ client.prepareIndex("test", "type1")
+ .setSource(XContentFactory.jsonBuilder()
+ .startObject()
+ .field("field1", entry.getKey())
+ .endObject()
+ )
+ .execute().actionGet();
+ }
+ }
+ client.admin().indices().prepareRefresh().execute().actionGet();
+
+ SearchResponse search = client.prepareSearch()
+ .setSuggestText("prefix_abcd")
+ .addSuggestion(fuzzySuggestion("size3SortScoreFirst")
+ .setSize(3).setMinDocFreq(0).setField("field1").setSuggestMode("always"))
+ .addSuggestion(fuzzySuggestion("size10SortScoreFirst")
+ .setSize(10).setMinDocFreq(0).setField("field1").setSuggestMode("always"))
+ .addSuggestion(fuzzySuggestion("size3SortScoreFirstMaxEdits1")
+ .setMaxEdits(1)
+ .setSize(10).setMinDocFreq(0).setField("field1").setSuggestMode("always"))
+ .addSuggestion(fuzzySuggestion("size10SortFrequencyFirst")
+ .setSize(10).setSort("frequency").setShardSize(1000)
+ .setMinDocFreq(0).setField("field1").setSuggestMode("always"))
+ .execute().actionGet();
+
+ assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
+ assertThat(search.suggest(), notNullValue());
+ assertThat(search.suggest().getSuggestions().size(), equalTo(4));
+ assertThat(search.suggest().getSuggestions().get(0).getName(), equalTo("size3SortScoreFirst"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().size(), equalTo(3));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("prefix_aacd"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(1).getTerm().string(), equalTo("prefix_abcc"));
+ assertThat(search.suggest().getSuggestions().get(0).getTerms().get(0).getSuggested().get(2).getTerm().string(), equalTo("prefix_accd"));
+
+ assertThat(search.suggest().getSuggestions().get(1).getName(), equalTo("size10SortScoreFirst"));
+ assertThat(search.suggest().getSuggestions().get(1).getTerms().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(1).getTerms().get(0).getSuggested().size(), equalTo(10));
+ assertThat(search.suggest().getSuggestions().get(1).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("prefix_aacd"));
+ assertThat(search.suggest().getSuggestions().get(1).getTerms().get(0).getSuggested().get(1).getTerm().string(), equalTo("prefix_abcc"));
+ assertThat(search.suggest().getSuggestions().get(1).getTerms().get(0).getSuggested().get(2).getTerm().string(), equalTo("prefix_accd"));
+ // This fails sometimes. Depending on how the docs are sharded. The suggested suggest corrections get the df on shard level, which
+ // isn't correct comparing it to the index level.
+// assertThat(search.suggest().suggestions().get(1).getSuggestedWords().get("prefix_abcd").get(3).getTerm(), equalTo("prefix_aaad"));
+
+ assertThat(search.suggest().getSuggestions().get(2).getName(), equalTo("size3SortScoreFirstMaxEdits1"));
+ assertThat(search.suggest().getSuggestions().get(2).getTerms().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(2).getTerms().get(0).getSuggested().size(), equalTo(3));
+ assertThat(search.suggest().getSuggestions().get(2).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("prefix_aacd"));
+ assertThat(search.suggest().getSuggestions().get(2).getTerms().get(0).getSuggested().get(1).getTerm().string(), equalTo("prefix_abcc"));
+ assertThat(search.suggest().getSuggestions().get(2).getTerms().get(0).getSuggested().get(2).getTerm().string(), equalTo("prefix_accd"));
+
+ assertThat(search.suggest().getSuggestions().get(3).getName(), equalTo("size10SortFrequencyFirst"));
+ assertThat(search.suggest().getSuggestions().get(3).getTerms().size(), equalTo(1));
+ assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().size(), equalTo(10));
+ assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(0).getTerm().string(), equalTo("prefix_aaad"));
+ assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(1).getTerm().string(), equalTo("prefix_abbb"));
+ assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(2).getTerm().string(), equalTo("prefix_aaca"));
+ assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(3).getTerm().string(), equalTo("prefix_abba"));
+ assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(4).getTerm().string(), equalTo("prefix_accc"));
+ assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(5).getTerm().string(), equalTo("prefix_addd"));
+ assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(6).getTerm().string(), equalTo("prefix_abaa"));
+ assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(7).getTerm().string(), equalTo("prefix_dbca"));
+ assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(8).getTerm().string(), equalTo("prefix_cbad"));
+ assertThat(search.suggest().getSuggestions().get(3).getTerms().get(0).getSuggested().get(9).getTerm().string(), equalTo("prefix_aacd"));
+// assertThat(search.suggest().suggestions().get(3).getSuggestedWords().get("prefix_abcd").get(4).getTerm(), equalTo("prefix_abcc"));
+// assertThat(search.suggest().suggestions().get(3).getSuggestedWords().get("prefix_abcd").get(4).getTerm(), equalTo("prefix_accd"));
+ }
+
+
+}