More Like This Query: defaults to all possible fields for items
Items with no specified field now defaults to all the possible fields from the document source. Previously, we had required 'fields' to be specified either as a top level parameter or for each item. The default behavior is now similar to the MLT API. Closes #7382
This commit is contained in:
parent
a1a9aadab5
commit
e78694ae82
|
@ -46,7 +46,6 @@ If only one document is specified, the query behaves the same as the
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
`more_like_this` can be shortened to `mlt`.
|
`more_like_this` can be shortened to `mlt`.
|
||||||
|
|
||||||
Under the hood, `more_like_this` simply creates multiple `should` clauses in a `bool` query of
|
Under the hood, `more_like_this` simply creates multiple `should` clauses in a `bool` query of
|
||||||
|
@ -61,26 +60,29 @@ such as `min_word_length`, `max_word_length` or `stop_words`, to control what
|
||||||
terms should be considered as interesting. In order to give more weight to
|
terms should be considered as interesting. In order to give more weight to
|
||||||
more interesting terms, each boolean clause associated with a term could be
|
more interesting terms, each boolean clause associated with a term could be
|
||||||
boosted by the term tf-idf score times some boosting factor `boost_terms`.
|
boosted by the term tf-idf score times some boosting factor `boost_terms`.
|
||||||
|
|
||||||
When a search for multiple `docs` is issued, More Like This generates a
|
When a search for multiple `docs` is issued, More Like This generates a
|
||||||
`more_like_this` query per document field in `fields`. These `fields` are
|
`more_like_this` query per document field in `fields`. These `fields` are
|
||||||
specified as a top level parameter or within each `doc`.
|
specified as a top level parameter or within each `doc`.
|
||||||
|
|
||||||
|
IMPORTANT: The fields must be indexed and of type `string`. Additionally, when
|
||||||
|
using `ids` or `docs`, the fields must be either `stored`, store `term_vector`
|
||||||
|
or `_source` must be enabled.
|
||||||
|
|
||||||
The `more_like_this` top level parameters include:
|
The `more_like_this` top level parameters include:
|
||||||
|
|
||||||
[cols="<,<",options="header",]
|
[cols="<,<",options="header",]
|
||||||
|=======================================================================
|
|=======================================================================
|
||||||
|Parameter |Description
|
|Parameter |Description
|
||||||
|`fields` |A list of the fields to run the more like this query against.
|
|`fields` |A list of the fields to run the more like this query against.
|
||||||
Defaults to the `_all` field.
|
Defaults to the `_all` field for `like_text` and to all possible fields
|
||||||
|
for `ids` or `docs`.
|
||||||
|
|
||||||
|`like_text` |The text to find documents like it, *required* if `ids` or `docs` are
|
|`like_text` |The text to find documents like it, *required* if `ids` or `docs` are
|
||||||
not specified.
|
not specified.
|
||||||
|
|
||||||
|`ids` or `docs` |A list of documents following the same syntax as the
|
|`ids` or `docs` |A list of documents following the same syntax as the
|
||||||
<<docs-multi-get,Multi GET API>>. This parameter is *required* if
|
<<docs-multi-get,Multi GET API>>. The text is fetched from `fields`
|
||||||
`like_text` is not specified. The texts are fetched from `fields` unless
|
unless specified otherwise in each `doc`.
|
||||||
specified in each `doc`, and cannot be set to `_all`.
|
|
||||||
|
|
||||||
|`include` |When using `ids` or `docs`, specifies whether the documents should be
|
|`include` |When using `ids` or `docs`, specifies whether the documents should be
|
||||||
included from the search. Defaults to `false`.
|
included from the search. Defaults to `false`.
|
||||||
|
|
|
@ -164,28 +164,34 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
||||||
if (mltQuery.getLikeText() == null && items.isEmpty()) {
|
if (mltQuery.getLikeText() == null && items.isEmpty()) {
|
||||||
throw new QueryParsingException(parseContext.index(), "more_like_this requires at least 'like_text' or 'ids/docs' to be specified");
|
throw new QueryParsingException(parseContext.index(), "more_like_this requires at least 'like_text' or 'ids/docs' to be specified");
|
||||||
}
|
}
|
||||||
|
if (moreLikeFields != null && moreLikeFields.isEmpty()) {
|
||||||
|
throw new QueryParsingException(parseContext.index(), "more_like_this requires 'fields' to be non-empty");
|
||||||
|
}
|
||||||
|
|
||||||
|
// set analyzer
|
||||||
if (analyzer == null) {
|
if (analyzer == null) {
|
||||||
analyzer = parseContext.mapperService().searchAnalyzer();
|
analyzer = parseContext.mapperService().searchAnalyzer();
|
||||||
}
|
}
|
||||||
mltQuery.setAnalyzer(analyzer);
|
mltQuery.setAnalyzer(analyzer);
|
||||||
|
|
||||||
if (moreLikeFields == null) {
|
// set like text fields
|
||||||
|
boolean useDefaultField = (moreLikeFields == null);
|
||||||
|
if (useDefaultField) {
|
||||||
moreLikeFields = Lists.newArrayList(parseContext.defaultField());
|
moreLikeFields = Lists.newArrayList(parseContext.defaultField());
|
||||||
} else if (moreLikeFields.isEmpty()) {
|
|
||||||
throw new QueryParsingException(parseContext.index(), "more_like_this requires 'fields' to be non-empty");
|
|
||||||
}
|
}
|
||||||
|
// possibly remove unsupported fields
|
||||||
removeUnsupportedFields(moreLikeFields, analyzer, failOnUnsupportedField);
|
removeUnsupportedFields(moreLikeFields, analyzer, failOnUnsupportedField);
|
||||||
if (moreLikeFields.isEmpty()) {
|
if (moreLikeFields.isEmpty()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY));
|
mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY));
|
||||||
|
|
||||||
|
// support for named query
|
||||||
if (queryName != null) {
|
if (queryName != null) {
|
||||||
parseContext.addNamedQuery(queryName, mltQuery);
|
parseContext.addNamedQuery(queryName, mltQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handle items
|
||||||
if (!items.isEmpty()) {
|
if (!items.isEmpty()) {
|
||||||
// set default index, type and fields if not specified
|
// set default index, type and fields if not specified
|
||||||
for (MultiGetRequest.Item item : items) {
|
for (MultiGetRequest.Item item : items) {
|
||||||
|
@ -201,7 +207,11 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (item.fields() == null && item.fetchSourceContext() == null) {
|
if (item.fields() == null && item.fetchSourceContext() == null) {
|
||||||
item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()]));
|
if (useDefaultField) {
|
||||||
|
item.fields("*");
|
||||||
|
} else {
|
||||||
|
item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// fetching the items with multi-termvectors API
|
// fetching the items with multi-termvectors API
|
||||||
|
|
Loading…
Reference in New Issue