More Like This Query: defaults to all possible fields for items

Items with no specified field now defaults to all the possible fields from the
document source. Previously, we had required 'fields' to be specified either
as a top level parameter or for each item. The default behavior is now similar
to the MLT API.

Closes #7382
This commit is contained in:
Alex Ksikes 2014-08-21 19:29:26 +02:00
parent a1a9aadab5
commit e78694ae82
2 changed files with 23 additions and 11 deletions

View File

@ -46,7 +46,6 @@ If only one document is specified, the query behaves the same as the
} }
-------------------------------------------------- --------------------------------------------------
`more_like_this` can be shortened to `mlt`. `more_like_this` can be shortened to `mlt`.
Under the hood, `more_like_this` simply creates multiple `should` clauses in a `bool` query of Under the hood, `more_like_this` simply creates multiple `should` clauses in a `bool` query of
@ -61,26 +60,29 @@ such as `min_word_length`, `max_word_length` or `stop_words`, to control what
terms should be considered as interesting. In order to give more weight to terms should be considered as interesting. In order to give more weight to
more interesting terms, each boolean clause associated with a term could be more interesting terms, each boolean clause associated with a term could be
boosted by the term tf-idf score times some boosting factor `boost_terms`. boosted by the term tf-idf score times some boosting factor `boost_terms`.
When a search for multiple `docs` is issued, More Like This generates a When a search for multiple `docs` is issued, More Like This generates a
`more_like_this` query per document field in `fields`. These `fields` are `more_like_this` query per document field in `fields`. These `fields` are
specified as a top level parameter or within each `doc`. specified as a top level parameter or within each `doc`.
IMPORTANT: The fields must be indexed and of type `string`. Additionally, when
using `ids` or `docs`, the fields must be either `stored`, store `term_vector`
or `_source` must be enabled.
The `more_like_this` top level parameters include: The `more_like_this` top level parameters include:
[cols="<,<",options="header",] [cols="<,<",options="header",]
|======================================================================= |=======================================================================
|Parameter |Description |Parameter |Description
|`fields` |A list of the fields to run the more like this query against. |`fields` |A list of the fields to run the more like this query against.
Defaults to the `_all` field. Defaults to the `_all` field for `like_text` and to all possible fields
for `ids` or `docs`.
|`like_text` |The text to find documents like it, *required* if `ids` or `docs` are |`like_text` |The text to find documents like it, *required* if `ids` or `docs` are
not specified. not specified.
|`ids` or `docs` |A list of documents following the same syntax as the |`ids` or `docs` |A list of documents following the same syntax as the
<<docs-multi-get,Multi GET API>>. This parameter is *required* if <<docs-multi-get,Multi GET API>>. The text is fetched from `fields`
`like_text` is not specified. The texts are fetched from `fields` unless unless specified otherwise in each `doc`.
specified in each `doc`, and cannot be set to `_all`.
|`include` |When using `ids` or `docs`, specifies whether the documents should be |`include` |When using `ids` or `docs`, specifies whether the documents should be
included from the search. Defaults to `false`. included from the search. Defaults to `false`.

View File

@ -164,28 +164,34 @@ public class MoreLikeThisQueryParser implements QueryParser {
if (mltQuery.getLikeText() == null && items.isEmpty()) { if (mltQuery.getLikeText() == null && items.isEmpty()) {
throw new QueryParsingException(parseContext.index(), "more_like_this requires at least 'like_text' or 'ids/docs' to be specified"); throw new QueryParsingException(parseContext.index(), "more_like_this requires at least 'like_text' or 'ids/docs' to be specified");
} }
if (moreLikeFields != null && moreLikeFields.isEmpty()) {
throw new QueryParsingException(parseContext.index(), "more_like_this requires 'fields' to be non-empty");
}
// set analyzer
if (analyzer == null) { if (analyzer == null) {
analyzer = parseContext.mapperService().searchAnalyzer(); analyzer = parseContext.mapperService().searchAnalyzer();
} }
mltQuery.setAnalyzer(analyzer); mltQuery.setAnalyzer(analyzer);
if (moreLikeFields == null) { // set like text fields
boolean useDefaultField = (moreLikeFields == null);
if (useDefaultField) {
moreLikeFields = Lists.newArrayList(parseContext.defaultField()); moreLikeFields = Lists.newArrayList(parseContext.defaultField());
} else if (moreLikeFields.isEmpty()) {
throw new QueryParsingException(parseContext.index(), "more_like_this requires 'fields' to be non-empty");
} }
// possibly remove unsupported fields
removeUnsupportedFields(moreLikeFields, analyzer, failOnUnsupportedField); removeUnsupportedFields(moreLikeFields, analyzer, failOnUnsupportedField);
if (moreLikeFields.isEmpty()) { if (moreLikeFields.isEmpty()) {
return null; return null;
} }
mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY)); mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY));
// support for named query
if (queryName != null) { if (queryName != null) {
parseContext.addNamedQuery(queryName, mltQuery); parseContext.addNamedQuery(queryName, mltQuery);
} }
// handle items
if (!items.isEmpty()) { if (!items.isEmpty()) {
// set default index, type and fields if not specified // set default index, type and fields if not specified
for (MultiGetRequest.Item item : items) { for (MultiGetRequest.Item item : items) {
@ -201,7 +207,11 @@ public class MoreLikeThisQueryParser implements QueryParser {
} }
} }
if (item.fields() == null && item.fetchSourceContext() == null) { if (item.fields() == null && item.fetchSourceContext() == null) {
item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()])); if (useDefaultField) {
item.fields("*");
} else {
item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()]));
}
} }
} }
// fetching the items with multi-termvectors API // fetching the items with multi-termvectors API