diff --git a/docs/reference/docs/multi-get.asciidoc b/docs/reference/docs/multi-get.asciidoc index d189a9584cd..86cf112781c 100644 --- a/docs/reference/docs/multi-get.asciidoc +++ b/docs/reference/docs/multi-get.asciidoc @@ -106,7 +106,7 @@ curl 'localhost:9200/_mget' -d '{ "_id" : "3", "_source" : { "include": ["user"], - "_exclude": ["user.location"] + "exclude": ["user.location"] } } ] diff --git a/docs/reference/query-dsl/queries/mlt-query.asciidoc b/docs/reference/query-dsl/queries/mlt-query.asciidoc index 4965b8677fc..98e1147a49f 100644 --- a/docs/reference/query-dsl/queries/mlt-query.asciidoc +++ b/docs/reference/query-dsl/queries/mlt-query.asciidoc @@ -16,6 +16,37 @@ running it against one or more fields. } -------------------------------------------------- +Additionally, More Like This can find documents that are "like" a set of +chosen documents. The syntax to specify one or more documents is similar to +the <>, and supports the `ids` or `docs` array. +If only one document is specified, the query behaves the same as the +<>. + +[source,js] +-------------------------------------------------- +{ + "more_like_this" : { + "fields" : ["name.first", "name.last"], + "docs" : [ + { + "_index" : "test", + "_type" : "type", + "_id" : "1" + }, + { + "_index" : "test", + "_type" : "type", + "_id" : "2" + } + ], + "ids" : ["3", "4"], + "min_term_freq" : 1, + "max_query_terms" : 12 + } +} +-------------------------------------------------- + + `more_like_this` can be shortened to `mlt`. Under the hood, `more_like_this` simply creates multiple `should` clauses in a `bool` query of @@ -31,6 +62,10 @@ terms should be considered as interesting. In order to give more weight to more interesting terms, each boolean clause associated with a term could be boosted by the term tf-idf score times some boosting factor `boost_terms`. +When a search for multiple `docs` is issued, More Like This generates a +`more_like_this` query per document field in `fields`. These `fields` are +specified as a top level parameter or within each `doc`. + The `more_like_this` top level parameters include: [cols="<,<",options="header",] @@ -39,7 +74,16 @@ The `more_like_this` top level parameters include: |`fields` |A list of the fields to run the more like this query against. Defaults to the `_all` field. -|`like_text` |The text to find documents like it, *required*. +|`like_text` |The text to find documents like it, *required* if `ids` is +not specified. + +|`ids` or `docs` |A list of documents following the same syntax as the +<>. This parameter is *required* if +`like_text` is not specified. The texts are fetched from `fields` unless +specified in each `doc`, and cannot be set to `_all`. + +|`exclude` |When using `ids`, specifies whether the documents should be +excluded from the search. Defaults to `true`. |`percent_terms_to_match` |The percentage of terms to match on (float value). Defaults to `0.3` (30 percent). diff --git a/src/main/java/org/elasticsearch/action/get/MultiGetRequest.java b/src/main/java/org/elasticsearch/action/get/MultiGetRequest.java index 5cb64f8c304..ad041808277 100644 --- a/src/main/java/org/elasticsearch/action/get/MultiGetRequest.java +++ b/src/main/java/org/elasticsearch/action/get/MultiGetRequest.java @@ -40,6 +40,7 @@ import org.elasticsearch.search.fetch.source.FetchSourceContext; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.List; @@ -58,7 +59,7 @@ public class MultiGetRequest extends ActionRequest implements I private VersionType versionType = VersionType.INTERNAL; private FetchSourceContext fetchSourceContext; - Item() { + public Item() { } @@ -88,6 +89,11 @@ public class MultiGetRequest extends ActionRequest implements I return this.type; } + public Item type(String type) { + this.type = type; + return this; + } + public String id() { return this.id; } @@ -195,6 +201,39 @@ public class MultiGetRequest extends ActionRequest implements I FetchSourceContext.optionalWriteToStream(fetchSourceContext, out); } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof Item)) return false; + + Item item = (Item) o; + + if (version != item.version) return false; + if (fetchSourceContext != null ? !fetchSourceContext.equals(item.fetchSourceContext) : item.fetchSourceContext != null) + return false; + if (!Arrays.equals(fields, item.fields)) return false; + if (!id.equals(item.id)) return false; + if (!index.equals(item.index)) return false; + if (routing != null ? !routing.equals(item.routing) : item.routing != null) return false; + if (type != null ? !type.equals(item.type) : item.type != null) return false; + if (versionType != item.versionType) return false; + + return true; + } + + @Override + public int hashCode() { + int result = index.hashCode(); + result = 31 * result + (type != null ? type.hashCode() : 0); + result = 31 * result + id.hashCode(); + result = 31 * result + (routing != null ? routing.hashCode() : 0); + result = 31 * result + (fields != null ? Arrays.hashCode(fields) : 0); + result = 31 * result + (int) (version ^ (version >>> 32)); + result = 31 * result + versionType.hashCode(); + result = 31 * result + (fetchSourceContext != null ? fetchSourceContext.hashCode() : 0); + return result; + } } private boolean listenerThreaded = false; @@ -205,6 +244,10 @@ public class MultiGetRequest extends ActionRequest implements I List items = new ArrayList<>(); + public List getItems() { + return this.items; + } + public MultiGetRequest add(Item item) { items.add(item); return this; @@ -287,115 +330,9 @@ public class MultiGetRequest extends ActionRequest implements I currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.START_ARRAY) { if ("docs".equals(currentFieldName)) { - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - if (token != XContentParser.Token.START_OBJECT) { - throw new ElasticsearchIllegalArgumentException("docs array element should include an object"); - } - String index = defaultIndex; - String type = defaultType; - String id = null; - String routing = defaultRouting; - String parent = null; - List fields = null; - long version = Versions.MATCH_ANY; - VersionType versionType = VersionType.INTERNAL; - - FetchSourceContext fetchSourceContext = null; - - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - currentFieldName = parser.currentName(); - } else if (token.isValue()) { - if ("_index".equals(currentFieldName)) { - if (!allowExplicitIndex) { - throw new ElasticsearchIllegalArgumentException("explicit index in multi get is not allowed"); - } - index = parser.text(); - } else if ("_type".equals(currentFieldName)) { - type = parser.text(); - } else if ("_id".equals(currentFieldName)) { - id = parser.text(); - } else if ("_routing".equals(currentFieldName) || "routing".equals(currentFieldName)) { - routing = parser.text(); - } else if ("_parent".equals(currentFieldName) || "parent".equals(currentFieldName)) { - parent = parser.text(); - } else if ("fields".equals(currentFieldName)) { - fields = new ArrayList<>(); - fields.add(parser.text()); - } else if ("_version".equals(currentFieldName) || "version".equals(currentFieldName)) { - version = parser.longValue(); - } else if ("_version_type".equals(currentFieldName) || "_versionType".equals(currentFieldName) || "version_type".equals(currentFieldName) || "versionType".equals(currentFieldName)) { - versionType = VersionType.fromString(parser.text()); - } else if ("_source".equals(currentFieldName)) { - if (parser.isBooleanValue()) { - fetchSourceContext = new FetchSourceContext(parser.booleanValue()); - } else if (token == XContentParser.Token.VALUE_STRING) { - fetchSourceContext = new FetchSourceContext(new String[]{parser.text()}); - } else { - throw new ElasticsearchParseException("illegal type for _source: [" + token + "]"); - } - } - } else if (token == XContentParser.Token.START_ARRAY) { - if ("fields".equals(currentFieldName)) { - fields = new ArrayList<>(); - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - fields.add(parser.text()); - } - } else if ("_source".equals(currentFieldName)) { - ArrayList includes = new ArrayList<>(); - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - includes.add(parser.text()); - } - fetchSourceContext = new FetchSourceContext(includes.toArray(Strings.EMPTY_ARRAY)); - } - - } else if (token == XContentParser.Token.START_OBJECT) { - if ("_source".equals(currentFieldName)) { - List currentList = null, includes = null, excludes = null; - - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - currentFieldName = parser.currentName(); - if ("includes".equals(currentFieldName) || "include".equals(currentFieldName)) { - currentList = includes != null ? includes : (includes = new ArrayList<>(2)); - } else if ("excludes".equals(currentFieldName) || "exclude".equals(currentFieldName)) { - currentList = excludes != null ? excludes : (excludes = new ArrayList<>(2)); - } else { - throw new ElasticsearchParseException("Source definition may not contain " + parser.text()); - } - } else if (token == XContentParser.Token.START_ARRAY) { - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - currentList.add(parser.text()); - } - } else if (token.isValue()) { - currentList.add(parser.text()); - } else { - throw new ElasticsearchParseException("unexpected token while parsing source settings"); - } - } - - fetchSourceContext = new FetchSourceContext( - includes == null ? Strings.EMPTY_ARRAY : includes.toArray(new String[includes.size()]), - excludes == null ? Strings.EMPTY_ARRAY : excludes.toArray(new String[excludes.size()])); - } - } - } - String[] aFields; - if (fields != null) { - aFields = fields.toArray(new String[fields.size()]); - } else { - aFields = defaultFields; - } - add(new Item(index, type, id).routing(routing).fields(aFields).parent(parent).version(version).versionType(versionType) - .fetchSourceContext(fetchSourceContext == null ? defaultFetchSource : fetchSourceContext)); - } + parseDocuments(parser, this.items, defaultIndex, defaultType, defaultFields, defaultFetchSource, defaultRouting, allowExplicitIndex); } else if ("ids".equals(currentFieldName)) { - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - if (!token.isValue()) { - throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids"); - } - add(new Item(defaultIndex, defaultType, parser.text()).fields(defaultFields).fetchSourceContext(defaultFetchSource).routing(defaultRouting)); - } + parseIds(parser, this.items, defaultIndex, defaultType, defaultFields, defaultFetchSource, defaultRouting); } } } @@ -403,6 +340,131 @@ public class MultiGetRequest extends ActionRequest implements I return this; } + public static void parseDocuments(XContentParser parser, List items, @Nullable String defaultIndex, @Nullable String defaultType, @Nullable String[] defaultFields, @Nullable FetchSourceContext defaultFetchSource, @Nullable String defaultRouting, boolean allowExplicitIndex) throws IOException { + String currentFieldName = null; + XContentParser.Token token; + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (token != XContentParser.Token.START_OBJECT) { + throw new ElasticsearchIllegalArgumentException("docs array element should include an object"); + } + String index = defaultIndex; + String type = defaultType; + String id = null; + String routing = defaultRouting; + String parent = null; + List fields = null; + long version = Versions.MATCH_ANY; + VersionType versionType = VersionType.INTERNAL; + + FetchSourceContext fetchSourceContext = null; + + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token.isValue()) { + if ("_index".equals(currentFieldName)) { + if (!allowExplicitIndex) { + throw new ElasticsearchIllegalArgumentException("explicit index in multi get is not allowed"); + } + index = parser.text(); + } else if ("_type".equals(currentFieldName)) { + type = parser.text(); + } else if ("_id".equals(currentFieldName)) { + id = parser.text(); + } else if ("_routing".equals(currentFieldName) || "routing".equals(currentFieldName)) { + routing = parser.text(); + } else if ("_parent".equals(currentFieldName) || "parent".equals(currentFieldName)) { + parent = parser.text(); + } else if ("fields".equals(currentFieldName)) { + fields = new ArrayList<>(); + fields.add(parser.text()); + } else if ("_version".equals(currentFieldName) || "version".equals(currentFieldName)) { + version = parser.longValue(); + } else if ("_version_type".equals(currentFieldName) || "_versionType".equals(currentFieldName) || "version_type".equals(currentFieldName) || "versionType".equals(currentFieldName)) { + versionType = VersionType.fromString(parser.text()); + } else if ("_source".equals(currentFieldName)) { + if (parser.isBooleanValue()) { + fetchSourceContext = new FetchSourceContext(parser.booleanValue()); + } else if (token == XContentParser.Token.VALUE_STRING) { + fetchSourceContext = new FetchSourceContext(new String[]{parser.text()}); + } else { + throw new ElasticsearchParseException("illegal type for _source: [" + token + "]"); + } + } + } else if (token == XContentParser.Token.START_ARRAY) { + if ("fields".equals(currentFieldName)) { + fields = new ArrayList<>(); + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + fields.add(parser.text()); + } + } else if ("_source".equals(currentFieldName)) { + ArrayList includes = new ArrayList<>(); + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + includes.add(parser.text()); + } + fetchSourceContext = new FetchSourceContext(includes.toArray(Strings.EMPTY_ARRAY)); + } + + } else if (token == XContentParser.Token.START_OBJECT) { + if ("_source".equals(currentFieldName)) { + List currentList = null, includes = null, excludes = null; + + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + if ("includes".equals(currentFieldName) || "include".equals(currentFieldName)) { + currentList = includes != null ? includes : (includes = new ArrayList<>(2)); + } else if ("excludes".equals(currentFieldName) || "exclude".equals(currentFieldName)) { + currentList = excludes != null ? excludes : (excludes = new ArrayList<>(2)); + } else { + throw new ElasticsearchParseException("Source definition may not contain " + parser.text()); + } + } else if (token == XContentParser.Token.START_ARRAY) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + currentList.add(parser.text()); + } + } else if (token.isValue()) { + currentList.add(parser.text()); + } else { + throw new ElasticsearchParseException("unexpected token while parsing source settings"); + } + } + + fetchSourceContext = new FetchSourceContext( + includes == null ? Strings.EMPTY_ARRAY : includes.toArray(new String[includes.size()]), + excludes == null ? Strings.EMPTY_ARRAY : excludes.toArray(new String[excludes.size()])); + } + } + } + String[] aFields; + if (fields != null) { + aFields = fields.toArray(new String[fields.size()]); + } else { + aFields = defaultFields; + } + items.add(new Item(index, type, id).routing(routing).fields(aFields).parent(parent).version(version).versionType(versionType) + .fetchSourceContext(fetchSourceContext == null ? defaultFetchSource : fetchSourceContext)); + } + } + + public static void parseDocuments(XContentParser parser, List items) throws IOException { + parseDocuments(parser, items, null, null, null, null, null, true); + } + + public static void parseIds(XContentParser parser, List items, @Nullable String defaultIndex, @Nullable String defaultType, @Nullable String[] defaultFields, @Nullable FetchSourceContext defaultFetchSource, @Nullable String defaultRouting) throws IOException { + XContentParser.Token token; + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (!token.isValue()) { + throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids"); + } + items.add(new Item(defaultIndex, defaultType, parser.text()).fields(defaultFields).fetchSourceContext(defaultFetchSource).routing(defaultRouting)); + } + } + + public static void parseIds(XContentParser parser, List items) throws IOException { + parseIds(parser, items, null, null, null, null, null); + } + @Override public Iterator iterator() { return Iterators.unmodifiableIterator(items.iterator()); diff --git a/src/main/java/org/elasticsearch/action/mlt/TransportMoreLikeThisAction.java b/src/main/java/org/elasticsearch/action/mlt/TransportMoreLikeThisAction.java index f0673de35ed..94365793a0d 100644 --- a/src/main/java/org/elasticsearch/action/mlt/TransportMoreLikeThisAction.java +++ b/src/main/java/org/elasticsearch/action/mlt/TransportMoreLikeThisAction.java @@ -35,7 +35,9 @@ import org.elasticsearch.action.support.TransportAction; import org.elasticsearch.cluster.ClusterService; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.routing.*; +import org.elasticsearch.cluster.routing.MutableShardRouting; +import org.elasticsearch.cluster.routing.ShardIterator; +import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.engine.DocumentMissingException; diff --git a/src/main/java/org/elasticsearch/index/mapper/Uid.java b/src/main/java/org/elasticsearch/index/mapper/Uid.java index de4d088a9f2..743b29650d2 100644 --- a/src/main/java/org/elasticsearch/index/mapper/Uid.java +++ b/src/main/java/org/elasticsearch/index/mapper/Uid.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.UnicodeUtil; +import org.elasticsearch.action.get.MultiGetRequest; import org.elasticsearch.common.lucene.BytesRefs; import java.util.Collection; @@ -94,6 +95,15 @@ public final class Uid { return new Uid(uid.substring(0, delimiterIndex), uid.substring(delimiterIndex + 1)); } + public static BytesRef[] createUids(List items) { + BytesRef[] uids = new BytesRef[items.size()]; + int idx = 0; + for (MultiGetRequest.Item item : items) { + uids[idx++] = createUidAsBytes(item); + } + return uids; + } + public static BytesRef createUidAsBytes(String type, String id) { return createUidAsBytes(new BytesRef(type), new BytesRef(id)); } @@ -102,6 +112,10 @@ public final class Uid { return createUidAsBytes(new BytesRef(type), id); } + public static BytesRef createUidAsBytes(MultiGetRequest.Item item) { + return createUidAsBytes(item.type(), item.id()); + } + public static BytesRef createUidAsBytes(BytesRef type, BytesRef id) { final BytesRef ref = new BytesRef(type.length + 1 + id.length); System.arraycopy(type.bytes, type.offset, ref.bytes, 0, type.length); diff --git a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java index dc205cffef0..ef9b4ff136b 100644 --- a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java @@ -20,9 +20,19 @@ package org.elasticsearch.index.query; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.action.get.MultiGetRequest; +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.lucene.uid.Versions; +import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.VersionType; +import org.elasticsearch.search.fetch.source.FetchSourceContext; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; /** * A more like this query that finds documents that are "like" the provided {@link #likeText(String)} @@ -30,9 +40,69 @@ import java.io.IOException; */ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements BoostableQueryBuilder { + /** + * A single get item. Pure delegate to multi get. + */ + public static final class Item extends MultiGetRequest.Item implements ToXContent { + public Item() { + super(); + } + + public Item(String index, @Nullable String type, String id) { + super(index, type, id); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + if (this.index() != null) { + builder.field("_index", this.index()); + } + if (this.id() != null) { + builder.field("_id", this.id()); + } + if (this.type() != null) { + builder.field("_type", this.type()); + } + if (this.fields() != null) { + builder.array("fields", this.fields()); + } + if (this.routing() != null) { + builder.field("_routing", this.routing()); + } + if (this.fetchSourceContext() != null) { + FetchSourceContext source = this.fetchSourceContext(); + String[] includes = source.includes(); + String[] excludes = source.excludes(); + if (includes.length == 0 && excludes.length == 0) { + builder.field("_source", source.fetchSource()); + } else if (includes.length > 0 && excludes.length == 0) { + builder.array("_source", source.includes()); + } else if (excludes.length > 0) { + builder.startObject("_source"); + if (includes.length > 0) { + builder.array("includes", source.includes()); + } + builder.array("excludes", source.excludes()); + builder.endObject(); + } + } + if (this.version() != Versions.MATCH_ANY) { + builder.field("_version", this.version()); + } + if (this.versionType() != VersionType.INTERNAL) { + builder.field("_version_type", this.versionType().toString().toLowerCase(Locale.ROOT)); + } + return builder.endObject(); + } + } + private final String[] fields; private String likeText; + private List ids = new ArrayList<>(); + private List docs = new ArrayList<>(); + private Boolean exclude = null; private float percentTermsToMatch = -1; private int minTermFreq = -1; private int maxQueryTerms = -1; @@ -71,6 +141,26 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta return this; } + public MoreLikeThisQueryBuilder ids(String... ids) { + this.ids = Arrays.asList(ids); + return this; + } + + public MoreLikeThisQueryBuilder docs(Item... docs) { + this.docs = Arrays.asList(docs); + return this; + } + + public MoreLikeThisQueryBuilder addItem(Item item) { + this.docs.add(item); + return this; + } + + public MoreLikeThisQueryBuilder exclude(boolean exclude) { + this.exclude = exclude; + return this; + } + /** * The percentage of terms to match. Defaults to 0.3. */ @@ -192,9 +282,9 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta } builder.endArray(); } - if (likeText == null) { - throw new ElasticsearchIllegalArgumentException("moreLikeThis requires '"+ - MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName() +"' to be provided"); + if (likeText == null && this.docs.isEmpty() && this.ids.isEmpty()) { + throw new ElasticsearchIllegalArgumentException("more_like_this requires either '"+ + MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName() +"' or 'docs/ids' to be provided"); } builder.field(MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName(), likeText); if (percentTermsToMatch != -1) { @@ -240,6 +330,15 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta if (queryName != null) { builder.field("_name", queryName); } + if (!ids.isEmpty()) { + builder.array("ids", ids.toArray()); + } + if (!docs.isEmpty()) { + builder.array("docs", docs.toArray()); + } + if (exclude != null) { + builder.field("exclude", exclude); + } builder.endObject(); } } diff --git a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java index 5385a91c507..fa920c54acc 100644 --- a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java @@ -22,19 +22,26 @@ package org.elasticsearch.index.query; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queries.TermsFilter; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.Query; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.action.get.MultiGetRequest; +import org.elasticsearch.common.Nullable; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.MoreLikeThisQuery; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.Analysis; +import org.elasticsearch.index.mapper.Uid; +import org.elasticsearch.index.mapper.internal.UidFieldMapper; +import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService; import java.io.IOException; -import java.util.Iterator; -import java.util.List; -import java.util.Set; +import java.util.*; /** * @@ -42,8 +49,8 @@ import java.util.Set; public class MoreLikeThisQueryParser implements QueryParser { public static final String NAME = "mlt"; - - + private MoreLikeThisFetchService fetchService = null; + public static class Fields { public static final ParseField LIKE_TEXT = new ParseField("like_text"); public static final ParseField MIN_TERM_FREQ = new ParseField("min_term_freq"); @@ -56,10 +63,18 @@ public class MoreLikeThisQueryParser implements QueryParser { public static final ParseField PERCENT_TERMS_TO_MATCH = new ParseField("percent_terms_to_match"); public static final ParseField FAIL_ON_UNSUPPORTED_FIELD = new ParseField("fail_on_unsupported_field"); public static final ParseField STOP_WORDS = new ParseField("stop_words"); - } + public static final ParseField DOCUMENT_IDS = new ParseField("ids"); + public static final ParseField DOCUMENTS = new ParseField("docs"); + public static final ParseField EXCLUDE = new ParseField("exclude"); + } - @Inject public MoreLikeThisQueryParser() { + + } + + @Inject(optional = true) + public void setFetchService(@Nullable MoreLikeThisFetchService fetchService) { + this.fetchService = fetchService; } @Override @@ -77,9 +92,11 @@ public class MoreLikeThisQueryParser implements QueryParser { List moreLikeFields = null; boolean failOnUnsupportedField = true; String queryName = null; + boolean exclude = true; XContentParser.Token token; String currentFieldName = null; + List items = new ArrayList(); while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); @@ -114,10 +131,12 @@ public class MoreLikeThisQueryParser implements QueryParser { failOnUnsupportedField = parser.booleanValue(); } else if ("_name".equals(currentFieldName)) { queryName = parser.text(); + } else if (Fields.EXCLUDE.match(currentFieldName, parseContext.parseFlags())) { + exclude = parser.booleanValue(); } else { throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]"); } - } else if (token == XContentParser.Token.START_ARRAY) { + } else if (token == XContentParser.Token.START_ARRAY) { if (Fields.STOP_WORDS.match(currentFieldName, parseContext.parseFlags())) { Set stopWords = Sets.newHashSet(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { @@ -129,14 +148,18 @@ public class MoreLikeThisQueryParser implements QueryParser { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { moreLikeFields.add(parseContext.indexName(parser.text())); } + } else if (Fields.DOCUMENT_IDS.match(currentFieldName, parseContext.parseFlags())) { + MultiGetRequest.parseIds(parser, items); + } else if (Fields.DOCUMENTS.match(currentFieldName, parseContext.parseFlags())) { + MultiGetRequest.parseDocuments(parser, items); } else { throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]"); } } } - if (mltQuery.getLikeText() == null) { - throw new QueryParsingException(parseContext.index(), "more_like_this requires 'like_text' to be specified"); + if ((mltQuery.getLikeText() == null && items.isEmpty()) || (mltQuery.getLikeText() != null && !items.isEmpty())) { + throw new QueryParsingException(parseContext.index(), "more_like_this requires either 'like_text' or 'ids/docs' to be specified"); } if (analyzer == null) { @@ -150,6 +173,75 @@ public class MoreLikeThisQueryParser implements QueryParser { throw new QueryParsingException(parseContext.index(), "more_like_this requires 'fields' to be non-empty"); } + removeUnsupportedFields(moreLikeFields, analyzer, failOnUnsupportedField); + if (moreLikeFields.isEmpty()) { + return null; + } + mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY)); + + if (queryName != null) { + parseContext.addNamedQuery(queryName, mltQuery); + } + + if (!items.isEmpty()) { + // set default index, type and fields if not specified + for (MultiGetRequest.Item item : items) { + if (item.index() == null) { + item.index(parseContext.index().name()); + } + if (item.type() == null) { + if (parseContext.queryTypes().size() > 1) { + throw new QueryParsingException(parseContext.index(), + "ambiguous type for item with id: " + item.id() + " and index: " + item.index()); + } else { + item.type(parseContext.queryTypes().iterator().next()); + } + } + if (item.fields() == null && item.fetchSourceContext() == null) { + item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()])); + } else { + // TODO how about fields content fetched from _source? + removeUnsupportedFields(item, analyzer, failOnUnsupportedField); + } + } + // fetching the items with multi-get + List likeTexts = fetchService.fetch(items); + // right now we are just building a boolean query + BooleanQuery boolQuery = new BooleanQuery(); + for (MoreLikeThisFetchService.LikeText likeText : likeTexts) { + addMoreLikeThis(boolQuery, mltQuery, likeText.field, likeText.text); + } + // exclude the items from the search + if (exclude) { + TermsFilter filter = new TermsFilter(UidFieldMapper.NAME, Uid.createUids(items)); + ConstantScoreQuery query = new ConstantScoreQuery(filter); + boolQuery.add(query, BooleanClause.Occur.MUST_NOT); + } + return boolQuery; + } + + return mltQuery; + } + + private void addMoreLikeThis(BooleanQuery boolQuery, MoreLikeThisQuery mltQuery, String fieldName, String likeText) { + MoreLikeThisQuery mlt = new MoreLikeThisQuery(); + mlt.setMoreLikeFields(new String[] {fieldName}); + mlt.setLikeText(likeText); + mlt.setAnalyzer(mltQuery.getAnalyzer()); + mlt.setPercentTermsToMatch(mltQuery.getPercentTermsToMatch()); + mlt.setBoostTerms(mltQuery.isBoostTerms()); + mlt.setBoostTermsFactor(mltQuery.getBoostTermsFactor()); + mlt.setMinDocFreq(mltQuery.getMinDocFreq()); + mlt.setMaxDocFreq(mltQuery.getMaxDocFreq()); + mlt.setMinWordLen(mltQuery.getMinWordLen()); + mlt.setMaxWordLen(mltQuery.getMaxWordLen()); + mlt.setMinTermFrequency(mltQuery.getMinTermFrequency()); + mlt.setMaxQueryTerms(mltQuery.getMaxQueryTerms()); + mlt.setStopWords(mltQuery.getStopWords()); + boolQuery.add(mlt, BooleanClause.Occur.SHOULD); + } + + private List removeUnsupportedFields(List moreLikeFields, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException { for (Iterator it = moreLikeFields.iterator(); it.hasNext(); ) { final String fieldName = it.next(); if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) { @@ -160,13 +252,11 @@ public class MoreLikeThisQueryParser implements QueryParser { } } } - if (moreLikeFields.isEmpty()) { - return null; - } - mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY)); - if (queryName != null) { - parseContext.addNamedQuery(queryName, mltQuery); - } - return mltQuery; + return moreLikeFields; } + + private void removeUnsupportedFields(MultiGetRequest.Item item, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException { + item.fields((String[]) removeUnsupportedFields(Arrays.asList(item.fields()), analyzer, failOnUnsupportedField).toArray()); + } + } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/search/morelikethis/MoreLikeThisFetchService.java b/src/main/java/org/elasticsearch/index/search/morelikethis/MoreLikeThisFetchService.java new file mode 100644 index 00000000000..3763c225d28 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/search/morelikethis/MoreLikeThisFetchService.java @@ -0,0 +1,83 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.search.morelikethis; + +import org.elasticsearch.action.get.GetResponse; +import org.elasticsearch.action.get.MultiGetItemResponse; +import org.elasticsearch.action.get.MultiGetRequest; +import org.elasticsearch.action.get.MultiGetResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.common.component.AbstractComponent; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.get.GetField; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * + */ +public class MoreLikeThisFetchService extends AbstractComponent { + + public static final class LikeText { + public final String field; + public final String text; + + public LikeText(String field, String text) { + this.field = field; + this.text = text; + } + } + + private final Client client; + + @Inject + public MoreLikeThisFetchService(Client client, Settings settings) { + super(settings); + this.client = client; + } + + public List fetch(List items) throws IOException { + MultiGetRequest request = new MultiGetRequest(); + for (MultiGetRequest.Item item : items) { + request.add(item); + } + MultiGetResponse responses = client.multiGet(request).actionGet(); + List likeTexts = new ArrayList<>(); + for (MultiGetItemResponse response : responses) { + if (response.isFailed()) { + continue; + } + GetResponse getResponse = response.getResponse(); + if (!getResponse.isExists()) { + continue; + } + + for (GetField getField : getResponse.getFields().values()) { + for (Object value : getField.getValues()) { + likeTexts.add(new LikeText(getField.getName(), value.toString())); + } + } + } + return likeTexts; + } +} diff --git a/src/main/java/org/elasticsearch/search/SearchModule.java b/src/main/java/org/elasticsearch/search/SearchModule.java index 1092e0e842e..434cec2ef31 100644 --- a/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/src/main/java/org/elasticsearch/search/SearchModule.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.inject.AbstractModule; import org.elasticsearch.common.inject.Module; import org.elasticsearch.common.inject.SpawnModules; import org.elasticsearch.index.query.functionscore.FunctionScoreModule; +import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService; import org.elasticsearch.search.action.SearchServiceTransportAction; import org.elasticsearch.search.aggregations.AggregationModule; import org.elasticsearch.search.controller.SearchPhaseController; @@ -70,5 +71,6 @@ public class SearchModule extends AbstractModule implements SpawnModules { bind(HighlightPhase.class).asEagerSingleton(); bind(SearchServiceTransportAction.class).asEagerSingleton(); + bind(MoreLikeThisFetchService.class).asEagerSingleton(); } } diff --git a/src/main/java/org/elasticsearch/search/fetch/source/FetchSourceContext.java b/src/main/java/org/elasticsearch/search/fetch/source/FetchSourceContext.java index 2fa7314c123..534b5dda60f 100644 --- a/src/main/java/org/elasticsearch/search/fetch/source/FetchSourceContext.java +++ b/src/main/java/org/elasticsearch/search/fetch/source/FetchSourceContext.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.io.stream.Streamable; import org.elasticsearch.rest.RestRequest; import java.io.IOException; +import java.util.Arrays; /** */ @@ -162,4 +163,26 @@ public class FetchSourceContext implements Streamable { out.writeStringArray(includes); out.writeStringArray(excludes); } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + FetchSourceContext that = (FetchSourceContext) o; + + if (fetchSource != that.fetchSource) return false; + if (!Arrays.equals(excludes, that.excludes)) return false; + if (!Arrays.equals(includes, that.includes)) return false; + + return true; + } + + @Override + public int hashCode() { + int result = (fetchSource ? 1 : 0); + result = 31 * result + (includes != null ? Arrays.hashCode(includes) : 0); + result = 31 * result + (excludes != null ? Arrays.hashCode(excludes) : 0); + return result; + } } diff --git a/src/test/java/org/elasticsearch/index/query/ItemSerializationTests.java b/src/test/java/org/elasticsearch/index/query/ItemSerializationTests.java new file mode 100644 index 00000000000..765ce4585ee --- /dev/null +++ b/src/test/java/org/elasticsearch/index/query/ItemSerializationTests.java @@ -0,0 +1,168 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import com.carrotsearch.randomizedtesting.generators.RandomPicks; +import org.elasticsearch.action.get.MultiGetRequest; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.index.VersionType; +import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item; +import org.elasticsearch.search.fetch.source.FetchSourceContext; +import org.elasticsearch.test.ElasticsearchTestCase; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Random; + +import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath; +import static org.hamcrest.Matchers.is; + +public class ItemSerializationTests extends ElasticsearchTestCase { + + private String[] generateRandomStringArray(int arraySize, int stringSize) { + String[] array = randomBoolean() ? new String[randomInt(arraySize)] : null; // allow empty arrays + if (array != null) { + for (int i = 0; i < array.length; i++) { + array[i] = randomAsciiOfLength(stringSize); + } + } + return array; + } + + private Item generateRandomItem(int arraySize, int stringSize) { + String index = randomAsciiOfLength(stringSize); + String type = randomAsciiOfLength(stringSize); + String id = String.valueOf(Math.abs(randomInt())); + String routing = randomBoolean() ? randomAsciiOfLength(stringSize) : null; + String[] fields = generateRandomStringArray(arraySize, stringSize); + + long version = Math.abs(randomLong()); + VersionType versionType = RandomPicks.randomFrom(new Random(), VersionType.values()); + + FetchSourceContext fetchSourceContext; + switch (randomIntBetween(0, 3)) { + case 0 : + fetchSourceContext = new FetchSourceContext(randomBoolean()); + break; + case 1 : + fetchSourceContext = new FetchSourceContext(generateRandomStringArray(arraySize, stringSize)); + break; + case 2 : + fetchSourceContext = new FetchSourceContext(generateRandomStringArray(arraySize, stringSize), + generateRandomStringArray(arraySize, stringSize)); + break; + default: + fetchSourceContext = null; + break; + } + return (Item) new Item(index, type, id).routing(routing).fields(fields).version(version).versionType(versionType) + .fetchSourceContext(fetchSourceContext); + } + + private String ItemToJSON(Item item) throws IOException { + XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.startObject(); + builder.startArray("docs"); + item.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endArray(); + builder.endObject(); + return XContentHelper.convertToJson(builder.bytes(), false); + } + + private MultiGetRequest.Item JSONtoItem(String json) throws Exception { + MultiGetRequest request = new MultiGetRequest().add(null, null, null, null, new BytesArray(json), true); + return request.getItems().get(0); + } + + @Test + public void testItemSerialization() throws Exception { + int numOfTrials = 100; + int maxArraySize = 7; + int maxStringSize = 8; + for (int i = 0; i < numOfTrials; i++) { + Item item1 = generateRandomItem(maxArraySize, maxStringSize); + String json = ItemToJSON(item1); + MultiGetRequest.Item item2 = JSONtoItem(json); + assertEquals(item1, item2); + } + } + + private List testItemsFromJSON(String json) throws Exception { + MultiGetRequest request = new MultiGetRequest(); + request.add(null, null, null, null, new BytesArray(json), true); + List items = request.getItems(); + + assertEquals(items.size(), 3); + for (MultiGetRequest.Item item : items) { + assertThat(item.index(), is("test")); + assertThat(item.type(), is("type")); + FetchSourceContext fetchSource = item.fetchSourceContext(); + switch (item.id()) { + case "1" : + assertThat(fetchSource.fetchSource(), is(false)); + break; + case "2" : + assertThat(fetchSource.fetchSource(), is(true)); + assertThat(fetchSource.includes(), is(new String[]{"field3", "field4"})); + break; + case "3" : + assertThat(fetchSource.fetchSource(), is(true)); + assertThat(fetchSource.includes(), is(new String[]{"user"})); + assertThat(fetchSource.excludes(), is(new String[]{"user.location"})); + break; + default: + fail("item with id: " + item.id() + " is not 1, 2 or 3"); + break; + } + } + return items; + } + + @Test + public void testSimpleItemSerializationFromFile() throws Exception { + // test items from JSON + List itemsFromJSON = testItemsFromJSON( + copyToStringFromClasspath("/org/elasticsearch/index/query/items.json")); + + // create builder from items + XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.startObject(); + builder.startArray("docs"); + for (MultiGetRequest.Item item : itemsFromJSON) { + MoreLikeThisQueryBuilder.Item itemForBuilder = (MoreLikeThisQueryBuilder.Item) new MoreLikeThisQueryBuilder.Item( + item.index(), item.type(), item.id()) + .fetchSourceContext(item.fetchSourceContext()) + .fields(item.fields()); + itemForBuilder.toXContent(builder, ToXContent.EMPTY_PARAMS); + } + builder.endArray(); + builder.endObject(); + + // verify generated JSON lead to the same items + String json = XContentHelper.convertToJson(builder.bytes(), false); + testItemsFromJSON(json); + } + +} diff --git a/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java b/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java index eb3bea3f603..81cf441d997 100644 --- a/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java +++ b/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java @@ -30,6 +30,7 @@ import org.apache.lucene.spatial.prefix.IntersectsPrefixTreeFilter; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.get.MultiGetRequest; import org.elasticsearch.cache.recycler.CacheRecyclerModule; import org.elasticsearch.cluster.ClusterService; import org.elasticsearch.common.bytes.BytesArray; @@ -63,6 +64,7 @@ import org.elasticsearch.index.search.NumericRangeFieldDataFilter; import org.elasticsearch.index.search.geo.GeoDistanceFilter; import org.elasticsearch.index.search.geo.GeoPolygonFilter; import org.elasticsearch.index.search.geo.InMemoryGeoBoundingBoxFilter; +import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService; import org.elasticsearch.index.settings.IndexSettingsModule; import org.elasticsearch.index.similarity.SimilarityModule; import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService; @@ -80,6 +82,7 @@ import org.junit.Test; import java.io.IOException; import java.lang.reflect.Field; +import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -137,6 +140,7 @@ public class SimpleIndexQueryParserTests extends ElasticsearchTestCase { String mapping = copyToStringFromClasspath("/org/elasticsearch/index/query/mapping.json"); injector.getInstance(MapperService.class).merge("person", new CompressedString(mapping), true); injector.getInstance(MapperService.class).documentMapper("person").parse(new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/index/query/data.json"))); + queryParser = injector.getInstance(IndexQueryParserService.class); } @@ -1671,6 +1675,58 @@ public class SimpleIndexQueryParserTests extends ElasticsearchTestCase { assertThat(mltQuery.getMaxQueryTerms(), equalTo(12)); } + @Test + public void testMoreLikeThisIds() throws Exception { + MoreLikeThisQueryParser parser = (MoreLikeThisQueryParser) queryParser.queryParser("more_like_this"); + parser.setFetchService(new MockMoreLikeThisFetchService()); + + List likeTexts = new ArrayList<>(); + String index = "test"; + String type = "person"; + for (int i = 1; i < 5; i++) { + for (String field : new String[]{"name.first", "name.last"}) { + MoreLikeThisFetchService.LikeText likeText = new MoreLikeThisFetchService.LikeText( + field, index + " " + type + " " + i + " " + field); + likeTexts.add(likeText); + } + } + + IndexQueryParserService queryParser = queryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/index/query/mlt-ids.json"); + Query parsedQuery = queryParser.parse(query).query(); + assertThat(parsedQuery, instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) parsedQuery; + assertThat(booleanQuery.getClauses().length, is(likeTexts.size())); + + for (int i=0; i fetch(List items) throws IOException { + List likeTexts = new ArrayList<>(); + for (MultiGetRequest.Item item: items) { + for (String field : item.fields()) { + LikeText likeText = new LikeText( + field, item.index() + " " + item.type() + " " + item.id() + " " + field); + likeTexts.add(likeText); + } + } + return likeTexts; + } + } + @Test public void testFuzzyLikeThisBuilder() throws Exception { IndexQueryParserService queryParser = queryParser(); diff --git a/src/test/java/org/elasticsearch/index/query/items.json b/src/test/java/org/elasticsearch/index/query/items.json new file mode 100644 index 00000000000..dc56fc38416 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/query/items.json @@ -0,0 +1,25 @@ +{ + "docs" : [ + { + "_index" : "test", + "_type" : "type", + "_id" : "1", + "_source" : false + }, + { + "_index" : "test", + "_type" : "type", + "_id" : "2", + "_source" : ["field3", "field4"] + }, + { + "_index" : "test", + "_type" : "type", + "_id" : "3", + "_source" : { + "include": ["user"], + "exclude": ["user.location"] + } + } + ] +} \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/index/query/mlt-ids.json b/src/test/java/org/elasticsearch/index/query/mlt-ids.json new file mode 100644 index 00000000000..ad7960a95b9 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/query/mlt-ids.json @@ -0,0 +1,21 @@ +{ + more_like_this:{ + "fields" : ["name.first", "name.last"], + "docs" : [ + { + "_index" : "test", + "_type" : "person", + "_id" : "1" + }, + { + "_index" : "test", + "_type" : "person", + "_id" : "2" + } + ], + "ids" : ["3", "4"], + "exclude" : false, + "min_term_freq" : 1, + "max_query_terms" : 12 + } +} diff --git a/src/test/java/org/elasticsearch/mlt/MoreLikeThisActionTests.java b/src/test/java/org/elasticsearch/mlt/MoreLikeThisActionTests.java index 32fe084b820..19c577fd1d2 100644 --- a/src/test/java/org/elasticsearch/mlt/MoreLikeThisActionTests.java +++ b/src/test/java/org/elasticsearch/mlt/MoreLikeThisActionTests.java @@ -20,12 +20,18 @@ package org.elasticsearch.mlt; import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus; +import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.action.mlt.MoreLikeThisRequest; import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.SearchType; import org.elasticsearch.client.Client; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.index.query.MoreLikeThisQueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.test.ElasticsearchIntegrationTest; import org.junit.Test; @@ -341,4 +347,118 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest { assertEquals(mltResponse.getHits().hits().length, 8); } + public void testSimpleMoreLikeThisIds() throws Exception { + logger.info("Creating index test"); + assertAcked(prepareCreate("test").addMapping("type1", + jsonBuilder().startObject().startObject("type1").startObject("properties") + .startObject("text").field("type", "string").endObject() + .endObject().endObject().endObject())); + + logger.info("Running Cluster Health"); + assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); + + logger.info("Indexing..."); + List builders = new ArrayList<>(); + builders.add(client().prepareIndex("test", "type1").setSource("text", "lucene").setId("1")); + builders.add(client().prepareIndex("test", "type1").setSource("text", "lucene release").setId("2")); + builders.add(client().prepareIndex("test", "type1").setSource("text", "apache lucene").setId("3")); + indexRandom(true, builders); + + logger.info("Running MoreLikeThis"); + MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").ids("1").exclude(false).minTermFreq(1).minDocFreq(1); + SearchResponse mltResponse = client().prepareSearch().setTypes("type1").setQuery(queryBuilder).execute().actionGet(); + assertHitCount(mltResponse, 3l); + } + + @Test + public void testCompareMoreLikeThisDSLWithAPI() throws Exception { + logger.info("Creating index test"); + assertAcked(prepareCreate("test").addMapping("type1", + jsonBuilder().startObject().startObject("type1").startObject("properties") + .startObject("text").field("type", "string").endObject() + .endObject().endObject().endObject())); + + logger.info("Running Cluster Health"); + assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); + + logger.info("Indexing..."); + String[] texts = new String[] { + "Apache Lucene", + "free and open source", + "information retrieval", + "software library", + "programmed in Java", + "Doug Cutting", + "Apache Software Foundation", + "Apache Software License", + "Lucene programming languages", + "Delphi, Perl, C#, C++, Python, Ruby, and PHP" + }; + List builders = new ArrayList<>(10); + for (int i = 0; i < texts.length; i++) { + builders.add(client().prepareIndex("test", "type1").setSource("text", texts[i]).setId(String.valueOf(i))); + } + indexRandom(true, builders); + + logger.info("Running MoreLikeThis DSL with IDs"); + Client client = client(); + MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").ids("0").minTermFreq(1).minDocFreq(1); + SearchResponse mltResponseDSL = client.prepareSearch() + .setSearchType(SearchType.QUERY_THEN_FETCH) + .setTypes("type1") + .setQuery(queryBuilder) + .execute().actionGet(); + assertSearchResponse(mltResponseDSL); + + logger.info("Running MoreLikeThis API"); + MoreLikeThisRequest mltRequest = moreLikeThisRequest("test").type("type1").id("0").minTermFreq(1).minDocFreq(1); + SearchResponse mltResponseAPI = client.moreLikeThis(mltRequest).actionGet(); + assertSearchResponse(mltResponseAPI); + + logger.info("Ensure the documents and scores returned are the same."); + SearchHit[] hitsDSL = mltResponseDSL.getHits().hits(); + SearchHit[] hitsAPI = mltResponseAPI.getHits().hits(); + assertThat("Not the same number of results.", hitsAPI.length, equalTo(hitsDSL.length)); + for (int i = 0; i < hitsDSL.length; i++) { + assertThat("Expected id: " + hitsDSL[i].getId() + " at position " + i + " but wasn't.", + hitsAPI[i].getId(), equalTo(hitsDSL[i].getId())); + assertThat("Expected score: " + hitsDSL[i].getScore() + " at position " + i + " but wasn't.", + hitsAPI[i].getScore(), equalTo(hitsDSL[i].getScore())); + } + } + + @Test + public void testSimpleMoreLikeThisIdsMultipleTypes() throws Exception { + logger.info("Creating index test"); + int numOfTypes = randomIntBetween(2, 10); + CreateIndexRequestBuilder createRequestBuilder = prepareCreate("test"); + for (int i = 0; i < numOfTypes; i++) { + createRequestBuilder.addMapping("type" + i, jsonBuilder().startObject().startObject("type" + i).startObject("properties") + .startObject("text").field("type", "string").endObject() + .endObject().endObject().endObject()); + } + assertAcked(createRequestBuilder); + + logger.info("Running Cluster Health"); + assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); + + logger.info("Indexing..."); + List builders = new ArrayList<>(numOfTypes); + for (int i = 0; i < numOfTypes; i++) { + builders.add(client().prepareIndex("test", "type" + i).setSource("text", "lucene" + " " + i).setId(String.valueOf(i))); + } + indexRandom(true, builders); + + logger.info("Running MoreLikeThis"); + MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").exclude(false).minTermFreq(1).minDocFreq(1) + .addItem(new MoreLikeThisQueryBuilder.Item("test", "type0", "0")); + + String[] types = new String[numOfTypes]; + for (int i = 0; i < numOfTypes; i++) { + types[i] = "type"+i; + } + SearchResponse mltResponse = client().prepareSearch().setTypes(types).setQuery(queryBuilder).execute().actionGet(); + assertHitCount(mltResponse, numOfTypes); + } + }