More Like This Query: Added searching for multiple items.

The syntax to specify one or more items is the same as for the Multi GET API.
If only one document is specified, the results returned are the same as when
using the More Like This API.

Relates #4075 Closes #5857
This commit is contained in:
Alex Ksikes 2014-04-17 17:09:20 +02:00
parent a3581959d7
commit db991dc3a4
15 changed files with 942 additions and 133 deletions

View File

@ -106,7 +106,7 @@ curl 'localhost:9200/_mget' -d '{
"_id" : "3",
"_source" : {
"include": ["user"],
"_exclude": ["user.location"]
"exclude": ["user.location"]
}
}
]

View File

@ -16,6 +16,37 @@ running it against one or more fields.
}
--------------------------------------------------
Additionally, More Like This can find documents that are "like" a set of
chosen documents. The syntax to specify one or more documents is similar to
the <<docs-multi-get,Multi GET API>>, and supports the `ids` or `docs` array.
If only one document is specified, the query behaves the same as the
<<search-more-like-this,More Like This API>>.
[source,js]
--------------------------------------------------
{
"more_like_this" : {
"fields" : ["name.first", "name.last"],
"docs" : [
{
"_index" : "test",
"_type" : "type",
"_id" : "1"
},
{
"_index" : "test",
"_type" : "type",
"_id" : "2"
}
],
"ids" : ["3", "4"],
"min_term_freq" : 1,
"max_query_terms" : 12
}
}
--------------------------------------------------
`more_like_this` can be shortened to `mlt`.
Under the hood, `more_like_this` simply creates multiple `should` clauses in a `bool` query of
@ -31,6 +62,10 @@ terms should be considered as interesting. In order to give more weight to
more interesting terms, each boolean clause associated with a term could be
boosted by the term tf-idf score times some boosting factor `boost_terms`.
When a search for multiple `docs` is issued, More Like This generates a
`more_like_this` query per document field in `fields`. These `fields` are
specified as a top level parameter or within each `doc`.
The `more_like_this` top level parameters include:
[cols="<,<",options="header",]
@ -39,7 +74,16 @@ The `more_like_this` top level parameters include:
|`fields` |A list of the fields to run the more like this query against.
Defaults to the `_all` field.
|`like_text` |The text to find documents like it, *required*.
|`like_text` |The text to find documents like it, *required* if `ids` is
not specified.
|`ids` or `docs` |A list of documents following the same syntax as the
<<docs-multi-get,Multi GET API>>. This parameter is *required* if
`like_text` is not specified. The texts are fetched from `fields` unless
specified in each `doc`, and cannot be set to `_all`.
|`exclude` |When using `ids`, specifies whether the documents should be
excluded from the search. Defaults to `true`.
|`percent_terms_to_match` |The percentage of terms to match on (float
value). Defaults to `0.3` (30 percent).

View File

@ -40,6 +40,7 @@ import org.elasticsearch.search.fetch.source.FetchSourceContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
@ -58,7 +59,7 @@ public class MultiGetRequest extends ActionRequest<MultiGetRequest> implements I
private VersionType versionType = VersionType.INTERNAL;
private FetchSourceContext fetchSourceContext;
Item() {
public Item() {
}
@ -88,6 +89,11 @@ public class MultiGetRequest extends ActionRequest<MultiGetRequest> implements I
return this.type;
}
public Item type(String type) {
this.type = type;
return this;
}
public String id() {
return this.id;
}
@ -195,6 +201,39 @@ public class MultiGetRequest extends ActionRequest<MultiGetRequest> implements I
FetchSourceContext.optionalWriteToStream(fetchSourceContext, out);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Item)) return false;
Item item = (Item) o;
if (version != item.version) return false;
if (fetchSourceContext != null ? !fetchSourceContext.equals(item.fetchSourceContext) : item.fetchSourceContext != null)
return false;
if (!Arrays.equals(fields, item.fields)) return false;
if (!id.equals(item.id)) return false;
if (!index.equals(item.index)) return false;
if (routing != null ? !routing.equals(item.routing) : item.routing != null) return false;
if (type != null ? !type.equals(item.type) : item.type != null) return false;
if (versionType != item.versionType) return false;
return true;
}
@Override
public int hashCode() {
int result = index.hashCode();
result = 31 * result + (type != null ? type.hashCode() : 0);
result = 31 * result + id.hashCode();
result = 31 * result + (routing != null ? routing.hashCode() : 0);
result = 31 * result + (fields != null ? Arrays.hashCode(fields) : 0);
result = 31 * result + (int) (version ^ (version >>> 32));
result = 31 * result + versionType.hashCode();
result = 31 * result + (fetchSourceContext != null ? fetchSourceContext.hashCode() : 0);
return result;
}
}
private boolean listenerThreaded = false;
@ -205,6 +244,10 @@ public class MultiGetRequest extends ActionRequest<MultiGetRequest> implements I
List<Item> items = new ArrayList<>();
public List<Item> getItems() {
return this.items;
}
public MultiGetRequest add(Item item) {
items.add(item);
return this;
@ -287,115 +330,9 @@ public class MultiGetRequest extends ActionRequest<MultiGetRequest> implements I
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.START_ARRAY) {
if ("docs".equals(currentFieldName)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token != XContentParser.Token.START_OBJECT) {
throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
}
String index = defaultIndex;
String type = defaultType;
String id = null;
String routing = defaultRouting;
String parent = null;
List<String> fields = null;
long version = Versions.MATCH_ANY;
VersionType versionType = VersionType.INTERNAL;
FetchSourceContext fetchSourceContext = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token.isValue()) {
if ("_index".equals(currentFieldName)) {
if (!allowExplicitIndex) {
throw new ElasticsearchIllegalArgumentException("explicit index in multi get is not allowed");
}
index = parser.text();
} else if ("_type".equals(currentFieldName)) {
type = parser.text();
} else if ("_id".equals(currentFieldName)) {
id = parser.text();
} else if ("_routing".equals(currentFieldName) || "routing".equals(currentFieldName)) {
routing = parser.text();
} else if ("_parent".equals(currentFieldName) || "parent".equals(currentFieldName)) {
parent = parser.text();
} else if ("fields".equals(currentFieldName)) {
fields = new ArrayList<>();
fields.add(parser.text());
} else if ("_version".equals(currentFieldName) || "version".equals(currentFieldName)) {
version = parser.longValue();
} else if ("_version_type".equals(currentFieldName) || "_versionType".equals(currentFieldName) || "version_type".equals(currentFieldName) || "versionType".equals(currentFieldName)) {
versionType = VersionType.fromString(parser.text());
} else if ("_source".equals(currentFieldName)) {
if (parser.isBooleanValue()) {
fetchSourceContext = new FetchSourceContext(parser.booleanValue());
} else if (token == XContentParser.Token.VALUE_STRING) {
fetchSourceContext = new FetchSourceContext(new String[]{parser.text()});
} else {
throw new ElasticsearchParseException("illegal type for _source: [" + token + "]");
}
}
} else if (token == XContentParser.Token.START_ARRAY) {
if ("fields".equals(currentFieldName)) {
fields = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
fields.add(parser.text());
}
} else if ("_source".equals(currentFieldName)) {
ArrayList<String> includes = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
includes.add(parser.text());
}
fetchSourceContext = new FetchSourceContext(includes.toArray(Strings.EMPTY_ARRAY));
}
} else if (token == XContentParser.Token.START_OBJECT) {
if ("_source".equals(currentFieldName)) {
List<String> currentList = null, includes = null, excludes = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
if ("includes".equals(currentFieldName) || "include".equals(currentFieldName)) {
currentList = includes != null ? includes : (includes = new ArrayList<>(2));
} else if ("excludes".equals(currentFieldName) || "exclude".equals(currentFieldName)) {
currentList = excludes != null ? excludes : (excludes = new ArrayList<>(2));
} else {
throw new ElasticsearchParseException("Source definition may not contain " + parser.text());
}
} else if (token == XContentParser.Token.START_ARRAY) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
currentList.add(parser.text());
}
} else if (token.isValue()) {
currentList.add(parser.text());
} else {
throw new ElasticsearchParseException("unexpected token while parsing source settings");
}
}
fetchSourceContext = new FetchSourceContext(
includes == null ? Strings.EMPTY_ARRAY : includes.toArray(new String[includes.size()]),
excludes == null ? Strings.EMPTY_ARRAY : excludes.toArray(new String[excludes.size()]));
}
}
}
String[] aFields;
if (fields != null) {
aFields = fields.toArray(new String[fields.size()]);
} else {
aFields = defaultFields;
}
add(new Item(index, type, id).routing(routing).fields(aFields).parent(parent).version(version).versionType(versionType)
.fetchSourceContext(fetchSourceContext == null ? defaultFetchSource : fetchSourceContext));
}
parseDocuments(parser, this.items, defaultIndex, defaultType, defaultFields, defaultFetchSource, defaultRouting, allowExplicitIndex);
} else if ("ids".equals(currentFieldName)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (!token.isValue()) {
throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
}
add(new Item(defaultIndex, defaultType, parser.text()).fields(defaultFields).fetchSourceContext(defaultFetchSource).routing(defaultRouting));
}
parseIds(parser, this.items, defaultIndex, defaultType, defaultFields, defaultFetchSource, defaultRouting);
}
}
}
@ -403,6 +340,131 @@ public class MultiGetRequest extends ActionRequest<MultiGetRequest> implements I
return this;
}
public static void parseDocuments(XContentParser parser, List<Item> items, @Nullable String defaultIndex, @Nullable String defaultType, @Nullable String[] defaultFields, @Nullable FetchSourceContext defaultFetchSource, @Nullable String defaultRouting, boolean allowExplicitIndex) throws IOException {
String currentFieldName = null;
XContentParser.Token token;
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token != XContentParser.Token.START_OBJECT) {
throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
}
String index = defaultIndex;
String type = defaultType;
String id = null;
String routing = defaultRouting;
String parent = null;
List<String> fields = null;
long version = Versions.MATCH_ANY;
VersionType versionType = VersionType.INTERNAL;
FetchSourceContext fetchSourceContext = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token.isValue()) {
if ("_index".equals(currentFieldName)) {
if (!allowExplicitIndex) {
throw new ElasticsearchIllegalArgumentException("explicit index in multi get is not allowed");
}
index = parser.text();
} else if ("_type".equals(currentFieldName)) {
type = parser.text();
} else if ("_id".equals(currentFieldName)) {
id = parser.text();
} else if ("_routing".equals(currentFieldName) || "routing".equals(currentFieldName)) {
routing = parser.text();
} else if ("_parent".equals(currentFieldName) || "parent".equals(currentFieldName)) {
parent = parser.text();
} else if ("fields".equals(currentFieldName)) {
fields = new ArrayList<>();
fields.add(parser.text());
} else if ("_version".equals(currentFieldName) || "version".equals(currentFieldName)) {
version = parser.longValue();
} else if ("_version_type".equals(currentFieldName) || "_versionType".equals(currentFieldName) || "version_type".equals(currentFieldName) || "versionType".equals(currentFieldName)) {
versionType = VersionType.fromString(parser.text());
} else if ("_source".equals(currentFieldName)) {
if (parser.isBooleanValue()) {
fetchSourceContext = new FetchSourceContext(parser.booleanValue());
} else if (token == XContentParser.Token.VALUE_STRING) {
fetchSourceContext = new FetchSourceContext(new String[]{parser.text()});
} else {
throw new ElasticsearchParseException("illegal type for _source: [" + token + "]");
}
}
} else if (token == XContentParser.Token.START_ARRAY) {
if ("fields".equals(currentFieldName)) {
fields = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
fields.add(parser.text());
}
} else if ("_source".equals(currentFieldName)) {
ArrayList<String> includes = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
includes.add(parser.text());
}
fetchSourceContext = new FetchSourceContext(includes.toArray(Strings.EMPTY_ARRAY));
}
} else if (token == XContentParser.Token.START_OBJECT) {
if ("_source".equals(currentFieldName)) {
List<String> currentList = null, includes = null, excludes = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
if ("includes".equals(currentFieldName) || "include".equals(currentFieldName)) {
currentList = includes != null ? includes : (includes = new ArrayList<>(2));
} else if ("excludes".equals(currentFieldName) || "exclude".equals(currentFieldName)) {
currentList = excludes != null ? excludes : (excludes = new ArrayList<>(2));
} else {
throw new ElasticsearchParseException("Source definition may not contain " + parser.text());
}
} else if (token == XContentParser.Token.START_ARRAY) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
currentList.add(parser.text());
}
} else if (token.isValue()) {
currentList.add(parser.text());
} else {
throw new ElasticsearchParseException("unexpected token while parsing source settings");
}
}
fetchSourceContext = new FetchSourceContext(
includes == null ? Strings.EMPTY_ARRAY : includes.toArray(new String[includes.size()]),
excludes == null ? Strings.EMPTY_ARRAY : excludes.toArray(new String[excludes.size()]));
}
}
}
String[] aFields;
if (fields != null) {
aFields = fields.toArray(new String[fields.size()]);
} else {
aFields = defaultFields;
}
items.add(new Item(index, type, id).routing(routing).fields(aFields).parent(parent).version(version).versionType(versionType)
.fetchSourceContext(fetchSourceContext == null ? defaultFetchSource : fetchSourceContext));
}
}
public static void parseDocuments(XContentParser parser, List<Item> items) throws IOException {
parseDocuments(parser, items, null, null, null, null, null, true);
}
public static void parseIds(XContentParser parser, List<Item> items, @Nullable String defaultIndex, @Nullable String defaultType, @Nullable String[] defaultFields, @Nullable FetchSourceContext defaultFetchSource, @Nullable String defaultRouting) throws IOException {
XContentParser.Token token;
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (!token.isValue()) {
throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
}
items.add(new Item(defaultIndex, defaultType, parser.text()).fields(defaultFields).fetchSourceContext(defaultFetchSource).routing(defaultRouting));
}
}
public static void parseIds(XContentParser parser, List<Item> items) throws IOException {
parseIds(parser, items, null, null, null, null, null);
}
@Override
public Iterator<Item> iterator() {
return Iterators.unmodifiableIterator(items.iterator());

View File

@ -35,7 +35,9 @@ import org.elasticsearch.action.support.TransportAction;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.*;
import org.elasticsearch.cluster.routing.MutableShardRouting;
import org.elasticsearch.cluster.routing.ShardIterator;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.engine.DocumentMissingException;

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.action.get.MultiGetRequest;
import org.elasticsearch.common.lucene.BytesRefs;
import java.util.Collection;
@ -94,6 +95,15 @@ public final class Uid {
return new Uid(uid.substring(0, delimiterIndex), uid.substring(delimiterIndex + 1));
}
public static BytesRef[] createUids(List<MultiGetRequest.Item> items) {
BytesRef[] uids = new BytesRef[items.size()];
int idx = 0;
for (MultiGetRequest.Item item : items) {
uids[idx++] = createUidAsBytes(item);
}
return uids;
}
public static BytesRef createUidAsBytes(String type, String id) {
return createUidAsBytes(new BytesRef(type), new BytesRef(id));
}
@ -102,6 +112,10 @@ public final class Uid {
return createUidAsBytes(new BytesRef(type), id);
}
public static BytesRef createUidAsBytes(MultiGetRequest.Item item) {
return createUidAsBytes(item.type(), item.id());
}
public static BytesRef createUidAsBytes(BytesRef type, BytesRef id) {
final BytesRef ref = new BytesRef(type.length + 1 + id.length);
System.arraycopy(type.bytes, type.offset, ref.bytes, 0, type.length);

View File

@ -20,9 +20,19 @@
package org.elasticsearch.index.query;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.action.get.MultiGetRequest;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.lucene.uid.Versions;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.search.fetch.source.FetchSourceContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
/**
* A more like this query that finds documents that are "like" the provided {@link #likeText(String)}
@ -30,9 +40,69 @@ import java.io.IOException;
*/
public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements BoostableQueryBuilder<MoreLikeThisQueryBuilder> {
/**
* A single get item. Pure delegate to multi get.
*/
public static final class Item extends MultiGetRequest.Item implements ToXContent {
public Item() {
super();
}
public Item(String index, @Nullable String type, String id) {
super(index, type, id);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
if (this.index() != null) {
builder.field("_index", this.index());
}
if (this.id() != null) {
builder.field("_id", this.id());
}
if (this.type() != null) {
builder.field("_type", this.type());
}
if (this.fields() != null) {
builder.array("fields", this.fields());
}
if (this.routing() != null) {
builder.field("_routing", this.routing());
}
if (this.fetchSourceContext() != null) {
FetchSourceContext source = this.fetchSourceContext();
String[] includes = source.includes();
String[] excludes = source.excludes();
if (includes.length == 0 && excludes.length == 0) {
builder.field("_source", source.fetchSource());
} else if (includes.length > 0 && excludes.length == 0) {
builder.array("_source", source.includes());
} else if (excludes.length > 0) {
builder.startObject("_source");
if (includes.length > 0) {
builder.array("includes", source.includes());
}
builder.array("excludes", source.excludes());
builder.endObject();
}
}
if (this.version() != Versions.MATCH_ANY) {
builder.field("_version", this.version());
}
if (this.versionType() != VersionType.INTERNAL) {
builder.field("_version_type", this.versionType().toString().toLowerCase(Locale.ROOT));
}
return builder.endObject();
}
}
private final String[] fields;
private String likeText;
private List<String> ids = new ArrayList<>();
private List<Item> docs = new ArrayList<>();
private Boolean exclude = null;
private float percentTermsToMatch = -1;
private int minTermFreq = -1;
private int maxQueryTerms = -1;
@ -71,6 +141,26 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
return this;
}
public MoreLikeThisQueryBuilder ids(String... ids) {
this.ids = Arrays.asList(ids);
return this;
}
public MoreLikeThisQueryBuilder docs(Item... docs) {
this.docs = Arrays.asList(docs);
return this;
}
public MoreLikeThisQueryBuilder addItem(Item item) {
this.docs.add(item);
return this;
}
public MoreLikeThisQueryBuilder exclude(boolean exclude) {
this.exclude = exclude;
return this;
}
/**
* The percentage of terms to match. Defaults to <tt>0.3</tt>.
*/
@ -192,9 +282,9 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
}
builder.endArray();
}
if (likeText == null) {
throw new ElasticsearchIllegalArgumentException("moreLikeThis requires '"+
MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName() +"' to be provided");
if (likeText == null && this.docs.isEmpty() && this.ids.isEmpty()) {
throw new ElasticsearchIllegalArgumentException("more_like_this requires either '"+
MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName() +"' or 'docs/ids' to be provided");
}
builder.field(MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName(), likeText);
if (percentTermsToMatch != -1) {
@ -240,6 +330,15 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
if (queryName != null) {
builder.field("_name", queryName);
}
if (!ids.isEmpty()) {
builder.array("ids", ids.toArray());
}
if (!docs.isEmpty()) {
builder.array("docs", docs.toArray());
}
if (exclude != null) {
builder.field("exclude", exclude);
}
builder.endObject();
}
}

View File

@ -22,19 +22,26 @@ package org.elasticsearch.index.query;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.TermsFilter;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.action.get.MultiGetRequest;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.*;
/**
*
@ -42,8 +49,8 @@ import java.util.Set;
public class MoreLikeThisQueryParser implements QueryParser {
public static final String NAME = "mlt";
private MoreLikeThisFetchService fetchService = null;
public static class Fields {
public static final ParseField LIKE_TEXT = new ParseField("like_text");
public static final ParseField MIN_TERM_FREQ = new ParseField("min_term_freq");
@ -56,10 +63,18 @@ public class MoreLikeThisQueryParser implements QueryParser {
public static final ParseField PERCENT_TERMS_TO_MATCH = new ParseField("percent_terms_to_match");
public static final ParseField FAIL_ON_UNSUPPORTED_FIELD = new ParseField("fail_on_unsupported_field");
public static final ParseField STOP_WORDS = new ParseField("stop_words");
}
public static final ParseField DOCUMENT_IDS = new ParseField("ids");
public static final ParseField DOCUMENTS = new ParseField("docs");
public static final ParseField EXCLUDE = new ParseField("exclude");
}
@Inject
public MoreLikeThisQueryParser() {
}
@Inject(optional = true)
public void setFetchService(@Nullable MoreLikeThisFetchService fetchService) {
this.fetchService = fetchService;
}
@Override
@ -77,9 +92,11 @@ public class MoreLikeThisQueryParser implements QueryParser {
List<String> moreLikeFields = null;
boolean failOnUnsupportedField = true;
String queryName = null;
boolean exclude = true;
XContentParser.Token token;
String currentFieldName = null;
List<MultiGetRequest.Item> items = new ArrayList<MultiGetRequest.Item>();
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
@ -114,10 +131,12 @@ public class MoreLikeThisQueryParser implements QueryParser {
failOnUnsupportedField = parser.booleanValue();
} else if ("_name".equals(currentFieldName)) {
queryName = parser.text();
} else if (Fields.EXCLUDE.match(currentFieldName, parseContext.parseFlags())) {
exclude = parser.booleanValue();
} else {
throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]");
}
} else if (token == XContentParser.Token.START_ARRAY) {
} else if (token == XContentParser.Token.START_ARRAY) {
if (Fields.STOP_WORDS.match(currentFieldName, parseContext.parseFlags())) {
Set<String> stopWords = Sets.newHashSet();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
@ -129,14 +148,18 @@ public class MoreLikeThisQueryParser implements QueryParser {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
moreLikeFields.add(parseContext.indexName(parser.text()));
}
} else if (Fields.DOCUMENT_IDS.match(currentFieldName, parseContext.parseFlags())) {
MultiGetRequest.parseIds(parser, items);
} else if (Fields.DOCUMENTS.match(currentFieldName, parseContext.parseFlags())) {
MultiGetRequest.parseDocuments(parser, items);
} else {
throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]");
}
}
}
if (mltQuery.getLikeText() == null) {
throw new QueryParsingException(parseContext.index(), "more_like_this requires 'like_text' to be specified");
if ((mltQuery.getLikeText() == null && items.isEmpty()) || (mltQuery.getLikeText() != null && !items.isEmpty())) {
throw new QueryParsingException(parseContext.index(), "more_like_this requires either 'like_text' or 'ids/docs' to be specified");
}
if (analyzer == null) {
@ -150,6 +173,75 @@ public class MoreLikeThisQueryParser implements QueryParser {
throw new QueryParsingException(parseContext.index(), "more_like_this requires 'fields' to be non-empty");
}
removeUnsupportedFields(moreLikeFields, analyzer, failOnUnsupportedField);
if (moreLikeFields.isEmpty()) {
return null;
}
mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY));
if (queryName != null) {
parseContext.addNamedQuery(queryName, mltQuery);
}
if (!items.isEmpty()) {
// set default index, type and fields if not specified
for (MultiGetRequest.Item item : items) {
if (item.index() == null) {
item.index(parseContext.index().name());
}
if (item.type() == null) {
if (parseContext.queryTypes().size() > 1) {
throw new QueryParsingException(parseContext.index(),
"ambiguous type for item with id: " + item.id() + " and index: " + item.index());
} else {
item.type(parseContext.queryTypes().iterator().next());
}
}
if (item.fields() == null && item.fetchSourceContext() == null) {
item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()]));
} else {
// TODO how about fields content fetched from _source?
removeUnsupportedFields(item, analyzer, failOnUnsupportedField);
}
}
// fetching the items with multi-get
List<MoreLikeThisFetchService.LikeText> likeTexts = fetchService.fetch(items);
// right now we are just building a boolean query
BooleanQuery boolQuery = new BooleanQuery();
for (MoreLikeThisFetchService.LikeText likeText : likeTexts) {
addMoreLikeThis(boolQuery, mltQuery, likeText.field, likeText.text);
}
// exclude the items from the search
if (exclude) {
TermsFilter filter = new TermsFilter(UidFieldMapper.NAME, Uid.createUids(items));
ConstantScoreQuery query = new ConstantScoreQuery(filter);
boolQuery.add(query, BooleanClause.Occur.MUST_NOT);
}
return boolQuery;
}
return mltQuery;
}
private void addMoreLikeThis(BooleanQuery boolQuery, MoreLikeThisQuery mltQuery, String fieldName, String likeText) {
MoreLikeThisQuery mlt = new MoreLikeThisQuery();
mlt.setMoreLikeFields(new String[] {fieldName});
mlt.setLikeText(likeText);
mlt.setAnalyzer(mltQuery.getAnalyzer());
mlt.setPercentTermsToMatch(mltQuery.getPercentTermsToMatch());
mlt.setBoostTerms(mltQuery.isBoostTerms());
mlt.setBoostTermsFactor(mltQuery.getBoostTermsFactor());
mlt.setMinDocFreq(mltQuery.getMinDocFreq());
mlt.setMaxDocFreq(mltQuery.getMaxDocFreq());
mlt.setMinWordLen(mltQuery.getMinWordLen());
mlt.setMaxWordLen(mltQuery.getMaxWordLen());
mlt.setMinTermFrequency(mltQuery.getMinTermFrequency());
mlt.setMaxQueryTerms(mltQuery.getMaxQueryTerms());
mlt.setStopWords(mltQuery.getStopWords());
boolQuery.add(mlt, BooleanClause.Occur.SHOULD);
}
private List<String> removeUnsupportedFields(List<String> moreLikeFields, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
for (Iterator<String> it = moreLikeFields.iterator(); it.hasNext(); ) {
final String fieldName = it.next();
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
@ -160,13 +252,11 @@ public class MoreLikeThisQueryParser implements QueryParser {
}
}
}
if (moreLikeFields.isEmpty()) {
return null;
}
mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY));
if (queryName != null) {
parseContext.addNamedQuery(queryName, mltQuery);
}
return mltQuery;
return moreLikeFields;
}
private void removeUnsupportedFields(MultiGetRequest.Item item, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
item.fields((String[]) removeUnsupportedFields(Arrays.asList(item.fields()), analyzer, failOnUnsupportedField).toArray());
}
}

View File

@ -0,0 +1,83 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.search.morelikethis;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.get.MultiGetItemResponse;
import org.elasticsearch.action.get.MultiGetRequest;
import org.elasticsearch.action.get.MultiGetResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.get.GetField;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
*
*/
public class MoreLikeThisFetchService extends AbstractComponent {
public static final class LikeText {
public final String field;
public final String text;
public LikeText(String field, String text) {
this.field = field;
this.text = text;
}
}
private final Client client;
@Inject
public MoreLikeThisFetchService(Client client, Settings settings) {
super(settings);
this.client = client;
}
public List<LikeText> fetch(List<MultiGetRequest.Item> items) throws IOException {
MultiGetRequest request = new MultiGetRequest();
for (MultiGetRequest.Item item : items) {
request.add(item);
}
MultiGetResponse responses = client.multiGet(request).actionGet();
List<LikeText> likeTexts = new ArrayList<>();
for (MultiGetItemResponse response : responses) {
if (response.isFailed()) {
continue;
}
GetResponse getResponse = response.getResponse();
if (!getResponse.isExists()) {
continue;
}
for (GetField getField : getResponse.getFields().values()) {
for (Object value : getField.getValues()) {
likeTexts.add(new LikeText(getField.getName(), value.toString()));
}
}
}
return likeTexts;
}
}

View File

@ -24,6 +24,7 @@ import org.elasticsearch.common.inject.AbstractModule;
import org.elasticsearch.common.inject.Module;
import org.elasticsearch.common.inject.SpawnModules;
import org.elasticsearch.index.query.functionscore.FunctionScoreModule;
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
import org.elasticsearch.search.action.SearchServiceTransportAction;
import org.elasticsearch.search.aggregations.AggregationModule;
import org.elasticsearch.search.controller.SearchPhaseController;
@ -70,5 +71,6 @@ public class SearchModule extends AbstractModule implements SpawnModules {
bind(HighlightPhase.class).asEagerSingleton();
bind(SearchServiceTransportAction.class).asEagerSingleton();
bind(MoreLikeThisFetchService.class).asEagerSingleton();
}
}

View File

@ -27,6 +27,7 @@ import org.elasticsearch.common.io.stream.Streamable;
import org.elasticsearch.rest.RestRequest;
import java.io.IOException;
import java.util.Arrays;
/**
*/
@ -162,4 +163,26 @@ public class FetchSourceContext implements Streamable {
out.writeStringArray(includes);
out.writeStringArray(excludes);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
FetchSourceContext that = (FetchSourceContext) o;
if (fetchSource != that.fetchSource) return false;
if (!Arrays.equals(excludes, that.excludes)) return false;
if (!Arrays.equals(includes, that.includes)) return false;
return true;
}
@Override
public int hashCode() {
int result = (fetchSource ? 1 : 0);
result = 31 * result + (includes != null ? Arrays.hashCode(includes) : 0);
result = 31 * result + (excludes != null ? Arrays.hashCode(excludes) : 0);
return result;
}
}

View File

@ -0,0 +1,168 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.elasticsearch.action.get.MultiGetRequest;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
import org.elasticsearch.search.fetch.source.FetchSourceContext;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.Test;
import java.io.IOException;
import java.util.List;
import java.util.Random;
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
import static org.hamcrest.Matchers.is;
public class ItemSerializationTests extends ElasticsearchTestCase {
private String[] generateRandomStringArray(int arraySize, int stringSize) {
String[] array = randomBoolean() ? new String[randomInt(arraySize)] : null; // allow empty arrays
if (array != null) {
for (int i = 0; i < array.length; i++) {
array[i] = randomAsciiOfLength(stringSize);
}
}
return array;
}
private Item generateRandomItem(int arraySize, int stringSize) {
String index = randomAsciiOfLength(stringSize);
String type = randomAsciiOfLength(stringSize);
String id = String.valueOf(Math.abs(randomInt()));
String routing = randomBoolean() ? randomAsciiOfLength(stringSize) : null;
String[] fields = generateRandomStringArray(arraySize, stringSize);
long version = Math.abs(randomLong());
VersionType versionType = RandomPicks.randomFrom(new Random(), VersionType.values());
FetchSourceContext fetchSourceContext;
switch (randomIntBetween(0, 3)) {
case 0 :
fetchSourceContext = new FetchSourceContext(randomBoolean());
break;
case 1 :
fetchSourceContext = new FetchSourceContext(generateRandomStringArray(arraySize, stringSize));
break;
case 2 :
fetchSourceContext = new FetchSourceContext(generateRandomStringArray(arraySize, stringSize),
generateRandomStringArray(arraySize, stringSize));
break;
default:
fetchSourceContext = null;
break;
}
return (Item) new Item(index, type, id).routing(routing).fields(fields).version(version).versionType(versionType)
.fetchSourceContext(fetchSourceContext);
}
private String ItemToJSON(Item item) throws IOException {
XContentBuilder builder = XContentFactory.jsonBuilder();
builder.startObject();
builder.startArray("docs");
item.toXContent(builder, ToXContent.EMPTY_PARAMS);
builder.endArray();
builder.endObject();
return XContentHelper.convertToJson(builder.bytes(), false);
}
private MultiGetRequest.Item JSONtoItem(String json) throws Exception {
MultiGetRequest request = new MultiGetRequest().add(null, null, null, null, new BytesArray(json), true);
return request.getItems().get(0);
}
@Test
public void testItemSerialization() throws Exception {
int numOfTrials = 100;
int maxArraySize = 7;
int maxStringSize = 8;
for (int i = 0; i < numOfTrials; i++) {
Item item1 = generateRandomItem(maxArraySize, maxStringSize);
String json = ItemToJSON(item1);
MultiGetRequest.Item item2 = JSONtoItem(json);
assertEquals(item1, item2);
}
}
private List<MultiGetRequest.Item> testItemsFromJSON(String json) throws Exception {
MultiGetRequest request = new MultiGetRequest();
request.add(null, null, null, null, new BytesArray(json), true);
List<MultiGetRequest.Item> items = request.getItems();
assertEquals(items.size(), 3);
for (MultiGetRequest.Item item : items) {
assertThat(item.index(), is("test"));
assertThat(item.type(), is("type"));
FetchSourceContext fetchSource = item.fetchSourceContext();
switch (item.id()) {
case "1" :
assertThat(fetchSource.fetchSource(), is(false));
break;
case "2" :
assertThat(fetchSource.fetchSource(), is(true));
assertThat(fetchSource.includes(), is(new String[]{"field3", "field4"}));
break;
case "3" :
assertThat(fetchSource.fetchSource(), is(true));
assertThat(fetchSource.includes(), is(new String[]{"user"}));
assertThat(fetchSource.excludes(), is(new String[]{"user.location"}));
break;
default:
fail("item with id: " + item.id() + " is not 1, 2 or 3");
break;
}
}
return items;
}
@Test
public void testSimpleItemSerializationFromFile() throws Exception {
// test items from JSON
List<MultiGetRequest.Item> itemsFromJSON = testItemsFromJSON(
copyToStringFromClasspath("/org/elasticsearch/index/query/items.json"));
// create builder from items
XContentBuilder builder = XContentFactory.jsonBuilder();
builder.startObject();
builder.startArray("docs");
for (MultiGetRequest.Item item : itemsFromJSON) {
MoreLikeThisQueryBuilder.Item itemForBuilder = (MoreLikeThisQueryBuilder.Item) new MoreLikeThisQueryBuilder.Item(
item.index(), item.type(), item.id())
.fetchSourceContext(item.fetchSourceContext())
.fields(item.fields());
itemForBuilder.toXContent(builder, ToXContent.EMPTY_PARAMS);
}
builder.endArray();
builder.endObject();
// verify generated JSON lead to the same items
String json = XContentHelper.convertToJson(builder.bytes(), false);
testItemsFromJSON(json);
}
}

View File

@ -30,6 +30,7 @@ import org.apache.lucene.spatial.prefix.IntersectsPrefixTreeFilter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.get.MultiGetRequest;
import org.elasticsearch.cache.recycler.CacheRecyclerModule;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.common.bytes.BytesArray;
@ -63,6 +64,7 @@ import org.elasticsearch.index.search.NumericRangeFieldDataFilter;
import org.elasticsearch.index.search.geo.GeoDistanceFilter;
import org.elasticsearch.index.search.geo.GeoPolygonFilter;
import org.elasticsearch.index.search.geo.InMemoryGeoBoundingBoxFilter;
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
import org.elasticsearch.index.settings.IndexSettingsModule;
import org.elasticsearch.index.similarity.SimilarityModule;
import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;
@ -80,6 +82,7 @@ import org.junit.Test;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@ -137,6 +140,7 @@ public class SimpleIndexQueryParserTests extends ElasticsearchTestCase {
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/query/mapping.json");
injector.getInstance(MapperService.class).merge("person", new CompressedString(mapping), true);
injector.getInstance(MapperService.class).documentMapper("person").parse(new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/index/query/data.json")));
queryParser = injector.getInstance(IndexQueryParserService.class);
}
@ -1671,6 +1675,58 @@ public class SimpleIndexQueryParserTests extends ElasticsearchTestCase {
assertThat(mltQuery.getMaxQueryTerms(), equalTo(12));
}
@Test
public void testMoreLikeThisIds() throws Exception {
MoreLikeThisQueryParser parser = (MoreLikeThisQueryParser) queryParser.queryParser("more_like_this");
parser.setFetchService(new MockMoreLikeThisFetchService());
List<MoreLikeThisFetchService.LikeText> likeTexts = new ArrayList<>();
String index = "test";
String type = "person";
for (int i = 1; i < 5; i++) {
for (String field : new String[]{"name.first", "name.last"}) {
MoreLikeThisFetchService.LikeText likeText = new MoreLikeThisFetchService.LikeText(
field, index + " " + type + " " + i + " " + field);
likeTexts.add(likeText);
}
}
IndexQueryParserService queryParser = queryParser();
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/mlt-ids.json");
Query parsedQuery = queryParser.parse(query).query();
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
BooleanQuery booleanQuery = (BooleanQuery) parsedQuery;
assertThat(booleanQuery.getClauses().length, is(likeTexts.size()));
for (int i=0; i<likeTexts.size(); i++) {
BooleanClause booleanClause = booleanQuery.getClauses()[i];
assertThat(booleanClause.getOccur(), is(BooleanClause.Occur.SHOULD));
assertThat(booleanClause.getQuery(), instanceOf(MoreLikeThisQuery.class));
MoreLikeThisQuery mltQuery = (MoreLikeThisQuery) booleanClause.getQuery();
assertThat(mltQuery.getLikeText(), is(likeTexts.get(i).text));
assertThat(mltQuery.getMoreLikeFields()[0], equalTo(likeTexts.get(i).field));
}
}
private static class MockMoreLikeThisFetchService extends MoreLikeThisFetchService {
public MockMoreLikeThisFetchService() {
super(null, ImmutableSettings.Builder.EMPTY_SETTINGS);
}
public List<LikeText> fetch(List<MultiGetRequest.Item> items) throws IOException {
List<LikeText> likeTexts = new ArrayList<>();
for (MultiGetRequest.Item item: items) {
for (String field : item.fields()) {
LikeText likeText = new LikeText(
field, item.index() + " " + item.type() + " " + item.id() + " " + field);
likeTexts.add(likeText);
}
}
return likeTexts;
}
}
@Test
public void testFuzzyLikeThisBuilder() throws Exception {
IndexQueryParserService queryParser = queryParser();

View File

@ -0,0 +1,25 @@
{
"docs" : [
{
"_index" : "test",
"_type" : "type",
"_id" : "1",
"_source" : false
},
{
"_index" : "test",
"_type" : "type",
"_id" : "2",
"_source" : ["field3", "field4"]
},
{
"_index" : "test",
"_type" : "type",
"_id" : "3",
"_source" : {
"include": ["user"],
"exclude": ["user.location"]
}
}
]
}

View File

@ -0,0 +1,21 @@
{
more_like_this:{
"fields" : ["name.first", "name.last"],
"docs" : [
{
"_index" : "test",
"_type" : "person",
"_id" : "1"
},
{
"_index" : "test",
"_type" : "person",
"_id" : "2"
}
],
"ids" : ["3", "4"],
"exclude" : false,
"min_term_freq" : 1,
"max_query_terms" : 12
}
}

View File

@ -20,12 +20,18 @@
package org.elasticsearch.mlt;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.mlt.MoreLikeThisRequest;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.junit.Test;
@ -341,4 +347,118 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest {
assertEquals(mltResponse.getHits().hits().length, 8);
}
public void testSimpleMoreLikeThisIds() throws Exception {
logger.info("Creating index test");
assertAcked(prepareCreate("test").addMapping("type1",
jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("text").field("type", "string").endObject()
.endObject().endObject().endObject()));
logger.info("Running Cluster Health");
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
logger.info("Indexing...");
List<IndexRequestBuilder> builders = new ArrayList<>();
builders.add(client().prepareIndex("test", "type1").setSource("text", "lucene").setId("1"));
builders.add(client().prepareIndex("test", "type1").setSource("text", "lucene release").setId("2"));
builders.add(client().prepareIndex("test", "type1").setSource("text", "apache lucene").setId("3"));
indexRandom(true, builders);
logger.info("Running MoreLikeThis");
MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").ids("1").exclude(false).minTermFreq(1).minDocFreq(1);
SearchResponse mltResponse = client().prepareSearch().setTypes("type1").setQuery(queryBuilder).execute().actionGet();
assertHitCount(mltResponse, 3l);
}
@Test
public void testCompareMoreLikeThisDSLWithAPI() throws Exception {
logger.info("Creating index test");
assertAcked(prepareCreate("test").addMapping("type1",
jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("text").field("type", "string").endObject()
.endObject().endObject().endObject()));
logger.info("Running Cluster Health");
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
logger.info("Indexing...");
String[] texts = new String[] {
"Apache Lucene",
"free and open source",
"information retrieval",
"software library",
"programmed in Java",
"Doug Cutting",
"Apache Software Foundation",
"Apache Software License",
"Lucene programming languages",
"Delphi, Perl, C#, C++, Python, Ruby, and PHP"
};
List<IndexRequestBuilder> builders = new ArrayList<>(10);
for (int i = 0; i < texts.length; i++) {
builders.add(client().prepareIndex("test", "type1").setSource("text", texts[i]).setId(String.valueOf(i)));
}
indexRandom(true, builders);
logger.info("Running MoreLikeThis DSL with IDs");
Client client = client();
MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").ids("0").minTermFreq(1).minDocFreq(1);
SearchResponse mltResponseDSL = client.prepareSearch()
.setSearchType(SearchType.QUERY_THEN_FETCH)
.setTypes("type1")
.setQuery(queryBuilder)
.execute().actionGet();
assertSearchResponse(mltResponseDSL);
logger.info("Running MoreLikeThis API");
MoreLikeThisRequest mltRequest = moreLikeThisRequest("test").type("type1").id("0").minTermFreq(1).minDocFreq(1);
SearchResponse mltResponseAPI = client.moreLikeThis(mltRequest).actionGet();
assertSearchResponse(mltResponseAPI);
logger.info("Ensure the documents and scores returned are the same.");
SearchHit[] hitsDSL = mltResponseDSL.getHits().hits();
SearchHit[] hitsAPI = mltResponseAPI.getHits().hits();
assertThat("Not the same number of results.", hitsAPI.length, equalTo(hitsDSL.length));
for (int i = 0; i < hitsDSL.length; i++) {
assertThat("Expected id: " + hitsDSL[i].getId() + " at position " + i + " but wasn't.",
hitsAPI[i].getId(), equalTo(hitsDSL[i].getId()));
assertThat("Expected score: " + hitsDSL[i].getScore() + " at position " + i + " but wasn't.",
hitsAPI[i].getScore(), equalTo(hitsDSL[i].getScore()));
}
}
@Test
public void testSimpleMoreLikeThisIdsMultipleTypes() throws Exception {
logger.info("Creating index test");
int numOfTypes = randomIntBetween(2, 10);
CreateIndexRequestBuilder createRequestBuilder = prepareCreate("test");
for (int i = 0; i < numOfTypes; i++) {
createRequestBuilder.addMapping("type" + i, jsonBuilder().startObject().startObject("type" + i).startObject("properties")
.startObject("text").field("type", "string").endObject()
.endObject().endObject().endObject());
}
assertAcked(createRequestBuilder);
logger.info("Running Cluster Health");
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
logger.info("Indexing...");
List<IndexRequestBuilder> builders = new ArrayList<>(numOfTypes);
for (int i = 0; i < numOfTypes; i++) {
builders.add(client().prepareIndex("test", "type" + i).setSource("text", "lucene" + " " + i).setId(String.valueOf(i)));
}
indexRandom(true, builders);
logger.info("Running MoreLikeThis");
MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").exclude(false).minTermFreq(1).minDocFreq(1)
.addItem(new MoreLikeThisQueryBuilder.Item("test", "type0", "0"));
String[] types = new String[numOfTypes];
for (int i = 0; i < numOfTypes; i++) {
types[i] = "type"+i;
}
SearchResponse mltResponse = client().prepareSearch().setTypes(types).setQuery(queryBuilder).execute().actionGet();
assertHitCount(mltResponse, numOfTypes);
}
}