diff --git a/docs/reference/docs/termvectors.asciidoc b/docs/reference/docs/termvectors.asciidoc index 876930f64b7..ecc7a6037a3 100644 --- a/docs/reference/docs/termvectors.asciidoc +++ b/docs/reference/docs/termvectors.asciidoc @@ -254,6 +254,7 @@ curl -XGET 'http://localhost:9200/twitter/tweet/1/_termvector?pretty=true' -d '{ -------------------------------------------------- [float] +[[docs-termvectors-artificial-doc]] === Example 3 Term vectors can also be generated for artificial documents, diff --git a/docs/reference/query-dsl/queries/mlt-query.asciidoc b/docs/reference/query-dsl/queries/mlt-query.asciidoc index 04276517917..a36e8d3c075 100644 --- a/docs/reference/query-dsl/queries/mlt-query.asciidoc +++ b/docs/reference/query-dsl/queries/mlt-query.asciidoc @@ -9,7 +9,7 @@ running it against one or more fields. { "more_like_this" : { "fields" : ["name.first", "name.last"], - "like_text" : "text like this one", + "like" : "text like this one", "min_term_freq" : 1, "max_query_terms" : 12 } @@ -18,7 +18,7 @@ running it against one or more fields. More Like This can find documents that are "like" a set of chosen documents. The syntax to specify one or more documents is similar to -the <>, and supports the `ids` or `docs` array. +the <>. If only one document is specified, the query behaves the same as the <>. @@ -27,7 +27,7 @@ If only one document is specified, the query behaves the same as the { "more_like_this" : { "fields" : ["name.first", "name.last"], - "docs" : [ + "like" : [ { "_index" : "test", "_type" : "type", @@ -37,26 +37,24 @@ If only one document is specified, the query behaves the same as the "_index" : "test", "_type" : "type", "_id" : "2" - } + }, + "and also some text like this one!" ], - "ids" : ["3", "4"], "min_term_freq" : 1, "max_query_terms" : 12 } } -------------------------------------------------- -Additionally, the `doc` syntax of the -<> is also supported. This is useful in -order to specify one or more documents not present in the index, and in -this case should be preferred over only using `like_text`. +Additionally, <> are also supported. +This is useful in order to specify one or more documents not present in the index. [source,js] -------------------------------------------------- { "more_like_this" : { "fields" : ["name.first", "name.last"], - "docs" : [ + "like" : [ { "_index" : "test", "_type" : "type", @@ -89,18 +87,18 @@ selected with respect to their tf-idf scores. These are controlled by `min_term_freq`, `min_doc_freq`, and `max_doc_freq`. The number of interesting terms is controlled by `max_query_terms`. While the minimum number of clauses that must be satisfied is controlled by `percent_terms_to_match`. The terms -are extracted from `like_text` which is analyzed by the analyzer associated +are extracted from the text in `like` and analyzed by the analyzer associated with the field, unless specified by `analyzer`. There are other parameters, such as `min_word_length`, `max_word_length` or `stop_words`, to control what terms should be considered as interesting. In order to give more weight to more interesting terms, each boolean clause associated with a term could be boosted by the term tf-idf score times some boosting factor `boost_terms`. -When a search for multiple `docs` is issued, More Like This generates a +When a search for multiple documents is issued, More Like This generates a `more_like_this` query per document field in `fields`. These `fields` are -specified as a top level parameter or within each `doc`. +specified as a top level parameter or within each document request. IMPORTANT: The fields must be indexed and of type `string`. Additionally, when -using `ids` or `docs`, the fields must be either `stored`, store `term_vector` +using `like` with documents, the fields must be either `stored`, store `term_vector` or `_source` must be enabled. The `more_like_this` top level parameters include: @@ -109,19 +107,29 @@ The `more_like_this` top level parameters include: |======================================================================= |Parameter |Description |`fields` |A list of the fields to run the more like this query against. -Defaults to the `_all` field for `like_text` and to all possible fields -for `ids` or `docs`. +Defaults to the `_all` field for text and to all possible fields +for documents. -|`like_text` |The text to find documents like it, *required* if `ids` or `docs` are +|`like`|coming[1.5.0] +Can either be some text, some documents or a combination of all, *required*. +A document request follows the same syntax as the +<> or <>. +In this case, the text is fetched from `fields` unless specified otherwise in each document request. +The text is analyzed by the default analyzer at the field, unless overridden by the +`per_field_analyzer` parameter of the <>. + +|`like_text` |deprecated[1.5.0,Replaced by `like`] +The text to find documents like it, *required* if `ids` or `docs` are not specified. -|`ids` or `docs` |A list of documents following the same syntax as the -<> or <>. +|`ids` or `docs` |deprecated[1.5.0,Replaced by `like`] +A list of documents following the same syntax as the +<> or <>. The text is fetched from `fields` unless specified otherwise in each `doc`. The text is analyzed by the default analyzer at the field, unless specified by the `per_field_analyzer` parameter of the <>. -|`include` |When using `ids` or `docs`, specifies whether the documents should be +|`include` |When using `like` with document requests, specifies whether the documents should be included from the search. Defaults to `false`. |`minimum_should_match`| From the generated query, the number of terms that diff --git a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java index 4396f6abbfc..af391873499 100644 --- a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java @@ -19,7 +19,10 @@ package org.elasticsearch.index.query; +import com.google.common.collect.Lists; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.action.get.MultiGetRequest; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.bytes.BytesReference; @@ -45,6 +48,7 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta */ public static final class Item extends MultiGetRequest.Item implements ToXContent { private BytesReference doc; + private String likeText; public Item() { super(); @@ -54,6 +58,10 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta super(index, type, id); } + public Item(String likeText) { + this.likeText = likeText; + } + public BytesReference doc() { return doc; } @@ -65,10 +73,16 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + if (this.likeText != null) { + return builder.value(this.likeText); + } builder.startObject(); if (this.index() != null) { builder.field("_index", this.index()); } + if (this.type() != null) { + builder.field("_type", this.type()); + } if (this.id() != null) { builder.field("_id", this.id()); } @@ -83,9 +97,6 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta builder.copyCurrentStructure(parser); } } - if (this.type() != null) { - builder.field("_type", this.type()); - } if (this.fields() != null) { builder.array("fields", this.fields()); } @@ -120,9 +131,6 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta } private final String[] fields; - - private String likeText; - private List ids = new ArrayList<>(); private List docs = new ArrayList<>(); private Boolean include = null; private String minimumShouldMatch = null; @@ -155,29 +163,51 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta this.fields = fields; } - /** - * The text to use in order to find documents that are "like" this. - */ - public MoreLikeThisQueryBuilder likeText(String likeText) { - this.likeText = likeText; - return this; - } - - public MoreLikeThisQueryBuilder ids(String... ids) { - this.ids = Arrays.asList(ids); - return this; - } - - public MoreLikeThisQueryBuilder docs(Item... docs) { + public MoreLikeThisQueryBuilder like(Item... docs) { this.docs = Arrays.asList(docs); return this; } + public MoreLikeThisQueryBuilder like(String... likeText) { + this.docs = new ArrayList<>(); + for (String text : likeText) { + this.docs.add(new Item(text)); + } + return this; + } + public MoreLikeThisQueryBuilder addItem(Item item) { this.docs.add(item); return this; } + public MoreLikeThisQueryBuilder addLikeText(String likeText) { + this.docs.add(new Item(likeText)); + return this; + } + + /** + * The text to use in order to find documents that are "like" this. + */ + @Deprecated + public MoreLikeThisQueryBuilder likeText(String likeText) { + return like(likeText); + } + + @Deprecated + public MoreLikeThisQueryBuilder ids(String... ids) { + Item[] items = new Item[ids.length]; + for (int i = 0; i < items.length; i++) { + items[i] = new Item(null, null, ids[i]); + } + return like(items); + } + + @Deprecated + public MoreLikeThisQueryBuilder docs(Item... docs) { + return like(docs); + } + public MoreLikeThisQueryBuilder include(boolean include) { this.include = include; return this; @@ -307,6 +337,7 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta @Override protected void doXContent(XContentBuilder builder, Params params) throws IOException { + String likeFieldName = MoreLikeThisQueryParser.Fields.LIKE.getPreferredName(); builder.startObject(MoreLikeThisQueryParser.NAME); if (fields != null) { builder.startArray("fields"); @@ -315,11 +346,15 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta } builder.endArray(); } - if (likeText == null && this.docs.isEmpty() && this.ids.isEmpty()) { - throw new ElasticsearchIllegalArgumentException("more_like_this requires either '"+ - MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName() +"' or 'docs/ids' to be provided"); + if (this.docs.isEmpty()) { + throw new ElasticsearchIllegalArgumentException("more_like_this requires '" + likeFieldName + "' to be provided"); + } else { + if (docs.size() == 1) { + builder.field(likeFieldName, docs); + } else { + builder.array(likeFieldName, docs); + } } - builder.field(MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName(), likeText); if (minimumShouldMatch != null) { builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch); } @@ -363,12 +398,6 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta if (queryName != null) { builder.field("_name", queryName); } - if (!ids.isEmpty()) { - builder.array("ids", ids.toArray()); - } - if (!docs.isEmpty()) { - builder.array("docs", docs.toArray()); - } if (include != null) { builder.field("include", include); } diff --git a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java index e3f07b33d78..dddb4888248 100644 --- a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java @@ -42,6 +42,7 @@ import org.elasticsearch.index.mapper.internal.UidFieldMapper; import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService; import java.io.IOException; +import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Set; @@ -55,7 +56,7 @@ public class MoreLikeThisQueryParser implements QueryParser { private MoreLikeThisFetchService fetchService = null; public static class Fields { - public static final ParseField LIKE_TEXT = new ParseField("like_text"); + public static final ParseField LIKE_TEXT = new ParseField("like_text").withAllDeprecated("like"); public static final ParseField MIN_TERM_FREQ = new ParseField("min_term_freq"); public static final ParseField MAX_QUERY_TERMS = new ParseField("max_query_terms"); public static final ParseField MIN_WORD_LENGTH = new ParseField("min_word_length", "min_word_len"); @@ -67,8 +68,9 @@ public class MoreLikeThisQueryParser implements QueryParser { public static final ParseField PERCENT_TERMS_TO_MATCH = new ParseField("percent_terms_to_match"); public static final ParseField FAIL_ON_UNSUPPORTED_FIELD = new ParseField("fail_on_unsupported_field"); public static final ParseField STOP_WORDS = new ParseField("stop_words"); - public static final ParseField DOCUMENT_IDS = new ParseField("ids"); - public static final ParseField DOCUMENTS = new ParseField("docs"); + public static final ParseField DOCUMENT_IDS = new ParseField("ids").withAllDeprecated("like"); + public static final ParseField DOCUMENTS = new ParseField("docs").withAllDeprecated("like"); + public static final ParseField LIKE = new ParseField("like"); public static final ParseField INCLUDE = new ParseField("include"); } @@ -100,13 +102,18 @@ public class MoreLikeThisQueryParser implements QueryParser { XContentParser.Token token; String currentFieldName = null; + + List likeTexts = new ArrayList<>(); MultiTermVectorsRequest items = new MultiTermVectorsRequest(); + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token.isValue()) { if (Fields.LIKE_TEXT.match(currentFieldName, parseContext.parseFlags())) { - mltQuery.setLikeText(parser.text()); + likeTexts.add(parser.text()); + } else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) { + parseLikeField(parser, likeTexts, items); } else if (Fields.MIN_TERM_FREQ.match(currentFieldName, parseContext.parseFlags())) { mltQuery.setMinTermFrequency(parser.intValue()); } else if (Fields.MAX_QUERY_TERMS.match(currentFieldName, parseContext.parseFlags())) { @@ -166,15 +173,25 @@ public class MoreLikeThisQueryParser implements QueryParser { if (token != XContentParser.Token.START_OBJECT) { throw new ElasticsearchIllegalArgumentException("docs array element should include an object"); } - items.add(parseDocuments(parser)); + items.add(parseDocument(parser)); } + } else if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + parseLikeField(parser, likeTexts, items); + } + } else { + throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]"); + } + } else if (token == XContentParser.Token.START_OBJECT) { + if (Fields.LIKE.match(currentFieldName, parseContext.parseFlags())) { + parseLikeField(parser, likeTexts, items); } else { throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]"); } } } - if (mltQuery.getLikeText() == null && items.isEmpty()) { + if (likeTexts.isEmpty() && items.isEmpty()) { throw new QueryParsingException(parseContext.index(), "more_like_this requires at least 'like_text' or 'ids/docs' to be specified"); } if (moreLikeFields != null && moreLikeFields.isEmpty()) { @@ -204,6 +221,11 @@ public class MoreLikeThisQueryParser implements QueryParser { parseContext.addNamedQuery(queryName, mltQuery); } + // handle like texts + if (!likeTexts.isEmpty()) { + mltQuery.setLikeText(likeTexts); + } + // handle items if (!items.isEmpty()) { // set default index, type and fields if not specified @@ -245,6 +267,22 @@ public class MoreLikeThisQueryParser implements QueryParser { return mltQuery; } + private TermVectorRequest parseDocument(XContentParser parser) throws IOException { + TermVectorRequest termVectorRequest = newTermVectorRequest(); + TermVectorRequest.parseRequest(termVectorRequest, parser); + return termVectorRequest; + } + + private void parseLikeField(XContentParser parser, List likeTexts, MultiTermVectorsRequest items) throws IOException { + if (parser.currentToken().isValue()) { + likeTexts.add(parser.text()); + } else if (parser.currentToken() == XContentParser.Token.START_OBJECT) { + items.add(parseDocument(parser)); + } else { + throw new ElasticsearchIllegalArgumentException("Content of 'like' parameter should either be a string or an object"); + } + } + private TermVectorRequest newTermVectorRequest() { return new TermVectorRequest() .positions(false) @@ -254,12 +292,6 @@ public class MoreLikeThisQueryParser implements QueryParser { .termStatistics(false); } - private TermVectorRequest parseDocuments(XContentParser parser) throws IOException { - TermVectorRequest termVectorRequest = newTermVectorRequest(); - TermVectorRequest.parseRequest(termVectorRequest, parser); - return termVectorRequest; - } - private List removeUnsupportedFields(List moreLikeFields, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException { for (Iterator it = moreLikeFields.iterator(); it.hasNext(); ) { final String fieldName = it.next();