More Like This Query: Added searching for multiple items.
The syntax to specify one or more items is the same as for the Multi GET API. If only one document is specified, the results returned are the same as when using the More Like This API. Relates #4075 Closes #5857
This commit is contained in:
parent
a3581959d7
commit
db991dc3a4
|
@ -106,7 +106,7 @@ curl 'localhost:9200/_mget' -d '{
|
|||
"_id" : "3",
|
||||
"_source" : {
|
||||
"include": ["user"],
|
||||
"_exclude": ["user.location"]
|
||||
"exclude": ["user.location"]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
|
@ -16,6 +16,37 @@ running it against one or more fields.
|
|||
}
|
||||
--------------------------------------------------
|
||||
|
||||
Additionally, More Like This can find documents that are "like" a set of
|
||||
chosen documents. The syntax to specify one or more documents is similar to
|
||||
the <<docs-multi-get,Multi GET API>>, and supports the `ids` or `docs` array.
|
||||
If only one document is specified, the query behaves the same as the
|
||||
<<search-more-like-this,More Like This API>>.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"more_like_this" : {
|
||||
"fields" : ["name.first", "name.last"],
|
||||
"docs" : [
|
||||
{
|
||||
"_index" : "test",
|
||||
"_type" : "type",
|
||||
"_id" : "1"
|
||||
},
|
||||
{
|
||||
"_index" : "test",
|
||||
"_type" : "type",
|
||||
"_id" : "2"
|
||||
}
|
||||
],
|
||||
"ids" : ["3", "4"],
|
||||
"min_term_freq" : 1,
|
||||
"max_query_terms" : 12
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
|
||||
`more_like_this` can be shortened to `mlt`.
|
||||
|
||||
Under the hood, `more_like_this` simply creates multiple `should` clauses in a `bool` query of
|
||||
|
@ -31,6 +62,10 @@ terms should be considered as interesting. In order to give more weight to
|
|||
more interesting terms, each boolean clause associated with a term could be
|
||||
boosted by the term tf-idf score times some boosting factor `boost_terms`.
|
||||
|
||||
When a search for multiple `docs` is issued, More Like This generates a
|
||||
`more_like_this` query per document field in `fields`. These `fields` are
|
||||
specified as a top level parameter or within each `doc`.
|
||||
|
||||
The `more_like_this` top level parameters include:
|
||||
|
||||
[cols="<,<",options="header",]
|
||||
|
@ -39,7 +74,16 @@ The `more_like_this` top level parameters include:
|
|||
|`fields` |A list of the fields to run the more like this query against.
|
||||
Defaults to the `_all` field.
|
||||
|
||||
|`like_text` |The text to find documents like it, *required*.
|
||||
|`like_text` |The text to find documents like it, *required* if `ids` is
|
||||
not specified.
|
||||
|
||||
|`ids` or `docs` |A list of documents following the same syntax as the
|
||||
<<docs-multi-get,Multi GET API>>. This parameter is *required* if
|
||||
`like_text` is not specified. The texts are fetched from `fields` unless
|
||||
specified in each `doc`, and cannot be set to `_all`.
|
||||
|
||||
|`exclude` |When using `ids`, specifies whether the documents should be
|
||||
excluded from the search. Defaults to `true`.
|
||||
|
||||
|`percent_terms_to_match` |The percentage of terms to match on (float
|
||||
value). Defaults to `0.3` (30 percent).
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.elasticsearch.search.fetch.source.FetchSourceContext;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -58,7 +59,7 @@ public class MultiGetRequest extends ActionRequest<MultiGetRequest> implements I
|
|||
private VersionType versionType = VersionType.INTERNAL;
|
||||
private FetchSourceContext fetchSourceContext;
|
||||
|
||||
Item() {
|
||||
public Item() {
|
||||
|
||||
}
|
||||
|
||||
|
@ -88,6 +89,11 @@ public class MultiGetRequest extends ActionRequest<MultiGetRequest> implements I
|
|||
return this.type;
|
||||
}
|
||||
|
||||
public Item type(String type) {
|
||||
this.type = type;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String id() {
|
||||
return this.id;
|
||||
}
|
||||
|
@ -195,6 +201,39 @@ public class MultiGetRequest extends ActionRequest<MultiGetRequest> implements I
|
|||
|
||||
FetchSourceContext.optionalWriteToStream(fetchSourceContext, out);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (!(o instanceof Item)) return false;
|
||||
|
||||
Item item = (Item) o;
|
||||
|
||||
if (version != item.version) return false;
|
||||
if (fetchSourceContext != null ? !fetchSourceContext.equals(item.fetchSourceContext) : item.fetchSourceContext != null)
|
||||
return false;
|
||||
if (!Arrays.equals(fields, item.fields)) return false;
|
||||
if (!id.equals(item.id)) return false;
|
||||
if (!index.equals(item.index)) return false;
|
||||
if (routing != null ? !routing.equals(item.routing) : item.routing != null) return false;
|
||||
if (type != null ? !type.equals(item.type) : item.type != null) return false;
|
||||
if (versionType != item.versionType) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = index.hashCode();
|
||||
result = 31 * result + (type != null ? type.hashCode() : 0);
|
||||
result = 31 * result + id.hashCode();
|
||||
result = 31 * result + (routing != null ? routing.hashCode() : 0);
|
||||
result = 31 * result + (fields != null ? Arrays.hashCode(fields) : 0);
|
||||
result = 31 * result + (int) (version ^ (version >>> 32));
|
||||
result = 31 * result + versionType.hashCode();
|
||||
result = 31 * result + (fetchSourceContext != null ? fetchSourceContext.hashCode() : 0);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean listenerThreaded = false;
|
||||
|
@ -205,6 +244,10 @@ public class MultiGetRequest extends ActionRequest<MultiGetRequest> implements I
|
|||
|
||||
List<Item> items = new ArrayList<>();
|
||||
|
||||
public List<Item> getItems() {
|
||||
return this.items;
|
||||
}
|
||||
|
||||
public MultiGetRequest add(Item item) {
|
||||
items.add(item);
|
||||
return this;
|
||||
|
@ -287,115 +330,9 @@ public class MultiGetRequest extends ActionRequest<MultiGetRequest> implements I
|
|||
currentFieldName = parser.currentName();
|
||||
} else if (token == XContentParser.Token.START_ARRAY) {
|
||||
if ("docs".equals(currentFieldName)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (token != XContentParser.Token.START_OBJECT) {
|
||||
throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
|
||||
}
|
||||
String index = defaultIndex;
|
||||
String type = defaultType;
|
||||
String id = null;
|
||||
String routing = defaultRouting;
|
||||
String parent = null;
|
||||
List<String> fields = null;
|
||||
long version = Versions.MATCH_ANY;
|
||||
VersionType versionType = VersionType.INTERNAL;
|
||||
|
||||
FetchSourceContext fetchSourceContext = null;
|
||||
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if (token.isValue()) {
|
||||
if ("_index".equals(currentFieldName)) {
|
||||
if (!allowExplicitIndex) {
|
||||
throw new ElasticsearchIllegalArgumentException("explicit index in multi get is not allowed");
|
||||
}
|
||||
index = parser.text();
|
||||
} else if ("_type".equals(currentFieldName)) {
|
||||
type = parser.text();
|
||||
} else if ("_id".equals(currentFieldName)) {
|
||||
id = parser.text();
|
||||
} else if ("_routing".equals(currentFieldName) || "routing".equals(currentFieldName)) {
|
||||
routing = parser.text();
|
||||
} else if ("_parent".equals(currentFieldName) || "parent".equals(currentFieldName)) {
|
||||
parent = parser.text();
|
||||
} else if ("fields".equals(currentFieldName)) {
|
||||
fields = new ArrayList<>();
|
||||
fields.add(parser.text());
|
||||
} else if ("_version".equals(currentFieldName) || "version".equals(currentFieldName)) {
|
||||
version = parser.longValue();
|
||||
} else if ("_version_type".equals(currentFieldName) || "_versionType".equals(currentFieldName) || "version_type".equals(currentFieldName) || "versionType".equals(currentFieldName)) {
|
||||
versionType = VersionType.fromString(parser.text());
|
||||
} else if ("_source".equals(currentFieldName)) {
|
||||
if (parser.isBooleanValue()) {
|
||||
fetchSourceContext = new FetchSourceContext(parser.booleanValue());
|
||||
} else if (token == XContentParser.Token.VALUE_STRING) {
|
||||
fetchSourceContext = new FetchSourceContext(new String[]{parser.text()});
|
||||
} else {
|
||||
throw new ElasticsearchParseException("illegal type for _source: [" + token + "]");
|
||||
}
|
||||
}
|
||||
} else if (token == XContentParser.Token.START_ARRAY) {
|
||||
if ("fields".equals(currentFieldName)) {
|
||||
fields = new ArrayList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
fields.add(parser.text());
|
||||
}
|
||||
} else if ("_source".equals(currentFieldName)) {
|
||||
ArrayList<String> includes = new ArrayList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
includes.add(parser.text());
|
||||
}
|
||||
fetchSourceContext = new FetchSourceContext(includes.toArray(Strings.EMPTY_ARRAY));
|
||||
}
|
||||
|
||||
} else if (token == XContentParser.Token.START_OBJECT) {
|
||||
if ("_source".equals(currentFieldName)) {
|
||||
List<String> currentList = null, includes = null, excludes = null;
|
||||
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
if ("includes".equals(currentFieldName) || "include".equals(currentFieldName)) {
|
||||
currentList = includes != null ? includes : (includes = new ArrayList<>(2));
|
||||
} else if ("excludes".equals(currentFieldName) || "exclude".equals(currentFieldName)) {
|
||||
currentList = excludes != null ? excludes : (excludes = new ArrayList<>(2));
|
||||
} else {
|
||||
throw new ElasticsearchParseException("Source definition may not contain " + parser.text());
|
||||
}
|
||||
} else if (token == XContentParser.Token.START_ARRAY) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
currentList.add(parser.text());
|
||||
}
|
||||
} else if (token.isValue()) {
|
||||
currentList.add(parser.text());
|
||||
} else {
|
||||
throw new ElasticsearchParseException("unexpected token while parsing source settings");
|
||||
}
|
||||
}
|
||||
|
||||
fetchSourceContext = new FetchSourceContext(
|
||||
includes == null ? Strings.EMPTY_ARRAY : includes.toArray(new String[includes.size()]),
|
||||
excludes == null ? Strings.EMPTY_ARRAY : excludes.toArray(new String[excludes.size()]));
|
||||
}
|
||||
}
|
||||
}
|
||||
String[] aFields;
|
||||
if (fields != null) {
|
||||
aFields = fields.toArray(new String[fields.size()]);
|
||||
} else {
|
||||
aFields = defaultFields;
|
||||
}
|
||||
add(new Item(index, type, id).routing(routing).fields(aFields).parent(parent).version(version).versionType(versionType)
|
||||
.fetchSourceContext(fetchSourceContext == null ? defaultFetchSource : fetchSourceContext));
|
||||
}
|
||||
parseDocuments(parser, this.items, defaultIndex, defaultType, defaultFields, defaultFetchSource, defaultRouting, allowExplicitIndex);
|
||||
} else if ("ids".equals(currentFieldName)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (!token.isValue()) {
|
||||
throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
|
||||
}
|
||||
add(new Item(defaultIndex, defaultType, parser.text()).fields(defaultFields).fetchSourceContext(defaultFetchSource).routing(defaultRouting));
|
||||
}
|
||||
parseIds(parser, this.items, defaultIndex, defaultType, defaultFields, defaultFetchSource, defaultRouting);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -403,6 +340,131 @@ public class MultiGetRequest extends ActionRequest<MultiGetRequest> implements I
|
|||
return this;
|
||||
}
|
||||
|
||||
public static void parseDocuments(XContentParser parser, List<Item> items, @Nullable String defaultIndex, @Nullable String defaultType, @Nullable String[] defaultFields, @Nullable FetchSourceContext defaultFetchSource, @Nullable String defaultRouting, boolean allowExplicitIndex) throws IOException {
|
||||
String currentFieldName = null;
|
||||
XContentParser.Token token;
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (token != XContentParser.Token.START_OBJECT) {
|
||||
throw new ElasticsearchIllegalArgumentException("docs array element should include an object");
|
||||
}
|
||||
String index = defaultIndex;
|
||||
String type = defaultType;
|
||||
String id = null;
|
||||
String routing = defaultRouting;
|
||||
String parent = null;
|
||||
List<String> fields = null;
|
||||
long version = Versions.MATCH_ANY;
|
||||
VersionType versionType = VersionType.INTERNAL;
|
||||
|
||||
FetchSourceContext fetchSourceContext = null;
|
||||
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if (token.isValue()) {
|
||||
if ("_index".equals(currentFieldName)) {
|
||||
if (!allowExplicitIndex) {
|
||||
throw new ElasticsearchIllegalArgumentException("explicit index in multi get is not allowed");
|
||||
}
|
||||
index = parser.text();
|
||||
} else if ("_type".equals(currentFieldName)) {
|
||||
type = parser.text();
|
||||
} else if ("_id".equals(currentFieldName)) {
|
||||
id = parser.text();
|
||||
} else if ("_routing".equals(currentFieldName) || "routing".equals(currentFieldName)) {
|
||||
routing = parser.text();
|
||||
} else if ("_parent".equals(currentFieldName) || "parent".equals(currentFieldName)) {
|
||||
parent = parser.text();
|
||||
} else if ("fields".equals(currentFieldName)) {
|
||||
fields = new ArrayList<>();
|
||||
fields.add(parser.text());
|
||||
} else if ("_version".equals(currentFieldName) || "version".equals(currentFieldName)) {
|
||||
version = parser.longValue();
|
||||
} else if ("_version_type".equals(currentFieldName) || "_versionType".equals(currentFieldName) || "version_type".equals(currentFieldName) || "versionType".equals(currentFieldName)) {
|
||||
versionType = VersionType.fromString(parser.text());
|
||||
} else if ("_source".equals(currentFieldName)) {
|
||||
if (parser.isBooleanValue()) {
|
||||
fetchSourceContext = new FetchSourceContext(parser.booleanValue());
|
||||
} else if (token == XContentParser.Token.VALUE_STRING) {
|
||||
fetchSourceContext = new FetchSourceContext(new String[]{parser.text()});
|
||||
} else {
|
||||
throw new ElasticsearchParseException("illegal type for _source: [" + token + "]");
|
||||
}
|
||||
}
|
||||
} else if (token == XContentParser.Token.START_ARRAY) {
|
||||
if ("fields".equals(currentFieldName)) {
|
||||
fields = new ArrayList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
fields.add(parser.text());
|
||||
}
|
||||
} else if ("_source".equals(currentFieldName)) {
|
||||
ArrayList<String> includes = new ArrayList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
includes.add(parser.text());
|
||||
}
|
||||
fetchSourceContext = new FetchSourceContext(includes.toArray(Strings.EMPTY_ARRAY));
|
||||
}
|
||||
|
||||
} else if (token == XContentParser.Token.START_OBJECT) {
|
||||
if ("_source".equals(currentFieldName)) {
|
||||
List<String> currentList = null, includes = null, excludes = null;
|
||||
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
if ("includes".equals(currentFieldName) || "include".equals(currentFieldName)) {
|
||||
currentList = includes != null ? includes : (includes = new ArrayList<>(2));
|
||||
} else if ("excludes".equals(currentFieldName) || "exclude".equals(currentFieldName)) {
|
||||
currentList = excludes != null ? excludes : (excludes = new ArrayList<>(2));
|
||||
} else {
|
||||
throw new ElasticsearchParseException("Source definition may not contain " + parser.text());
|
||||
}
|
||||
} else if (token == XContentParser.Token.START_ARRAY) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
currentList.add(parser.text());
|
||||
}
|
||||
} else if (token.isValue()) {
|
||||
currentList.add(parser.text());
|
||||
} else {
|
||||
throw new ElasticsearchParseException("unexpected token while parsing source settings");
|
||||
}
|
||||
}
|
||||
|
||||
fetchSourceContext = new FetchSourceContext(
|
||||
includes == null ? Strings.EMPTY_ARRAY : includes.toArray(new String[includes.size()]),
|
||||
excludes == null ? Strings.EMPTY_ARRAY : excludes.toArray(new String[excludes.size()]));
|
||||
}
|
||||
}
|
||||
}
|
||||
String[] aFields;
|
||||
if (fields != null) {
|
||||
aFields = fields.toArray(new String[fields.size()]);
|
||||
} else {
|
||||
aFields = defaultFields;
|
||||
}
|
||||
items.add(new Item(index, type, id).routing(routing).fields(aFields).parent(parent).version(version).versionType(versionType)
|
||||
.fetchSourceContext(fetchSourceContext == null ? defaultFetchSource : fetchSourceContext));
|
||||
}
|
||||
}
|
||||
|
||||
public static void parseDocuments(XContentParser parser, List<Item> items) throws IOException {
|
||||
parseDocuments(parser, items, null, null, null, null, null, true);
|
||||
}
|
||||
|
||||
public static void parseIds(XContentParser parser, List<Item> items, @Nullable String defaultIndex, @Nullable String defaultType, @Nullable String[] defaultFields, @Nullable FetchSourceContext defaultFetchSource, @Nullable String defaultRouting) throws IOException {
|
||||
XContentParser.Token token;
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (!token.isValue()) {
|
||||
throw new ElasticsearchIllegalArgumentException("ids array element should only contain ids");
|
||||
}
|
||||
items.add(new Item(defaultIndex, defaultType, parser.text()).fields(defaultFields).fetchSourceContext(defaultFetchSource).routing(defaultRouting));
|
||||
}
|
||||
}
|
||||
|
||||
public static void parseIds(XContentParser parser, List<Item> items) throws IOException {
|
||||
parseIds(parser, items, null, null, null, null, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<Item> iterator() {
|
||||
return Iterators.unmodifiableIterator(items.iterator());
|
||||
|
|
|
@ -35,7 +35,9 @@ import org.elasticsearch.action.support.TransportAction;
|
|||
import org.elasticsearch.cluster.ClusterService;
|
||||
import org.elasticsearch.cluster.ClusterState;
|
||||
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||
import org.elasticsearch.cluster.routing.*;
|
||||
import org.elasticsearch.cluster.routing.MutableShardRouting;
|
||||
import org.elasticsearch.cluster.routing.ShardIterator;
|
||||
import org.elasticsearch.cluster.routing.ShardRouting;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.engine.DocumentMissingException;
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper;
|
|||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.action.get.MultiGetRequest;
|
||||
import org.elasticsearch.common.lucene.BytesRefs;
|
||||
|
||||
import java.util.Collection;
|
||||
|
@ -94,6 +95,15 @@ public final class Uid {
|
|||
return new Uid(uid.substring(0, delimiterIndex), uid.substring(delimiterIndex + 1));
|
||||
}
|
||||
|
||||
public static BytesRef[] createUids(List<MultiGetRequest.Item> items) {
|
||||
BytesRef[] uids = new BytesRef[items.size()];
|
||||
int idx = 0;
|
||||
for (MultiGetRequest.Item item : items) {
|
||||
uids[idx++] = createUidAsBytes(item);
|
||||
}
|
||||
return uids;
|
||||
}
|
||||
|
||||
public static BytesRef createUidAsBytes(String type, String id) {
|
||||
return createUidAsBytes(new BytesRef(type), new BytesRef(id));
|
||||
}
|
||||
|
@ -102,6 +112,10 @@ public final class Uid {
|
|||
return createUidAsBytes(new BytesRef(type), id);
|
||||
}
|
||||
|
||||
public static BytesRef createUidAsBytes(MultiGetRequest.Item item) {
|
||||
return createUidAsBytes(item.type(), item.id());
|
||||
}
|
||||
|
||||
public static BytesRef createUidAsBytes(BytesRef type, BytesRef id) {
|
||||
final BytesRef ref = new BytesRef(type.length + 1 + id.length);
|
||||
System.arraycopy(type.bytes, type.offset, ref.bytes, 0, type.length);
|
||||
|
|
|
@ -20,9 +20,19 @@
|
|||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.action.get.MultiGetRequest;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.lucene.uid.Versions;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.index.VersionType;
|
||||
import org.elasticsearch.search.fetch.source.FetchSourceContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* A more like this query that finds documents that are "like" the provided {@link #likeText(String)}
|
||||
|
@ -30,9 +40,69 @@ import java.io.IOException;
|
|||
*/
|
||||
public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements BoostableQueryBuilder<MoreLikeThisQueryBuilder> {
|
||||
|
||||
/**
|
||||
* A single get item. Pure delegate to multi get.
|
||||
*/
|
||||
public static final class Item extends MultiGetRequest.Item implements ToXContent {
|
||||
public Item() {
|
||||
super();
|
||||
}
|
||||
|
||||
public Item(String index, @Nullable String type, String id) {
|
||||
super(index, type, id);
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
if (this.index() != null) {
|
||||
builder.field("_index", this.index());
|
||||
}
|
||||
if (this.id() != null) {
|
||||
builder.field("_id", this.id());
|
||||
}
|
||||
if (this.type() != null) {
|
||||
builder.field("_type", this.type());
|
||||
}
|
||||
if (this.fields() != null) {
|
||||
builder.array("fields", this.fields());
|
||||
}
|
||||
if (this.routing() != null) {
|
||||
builder.field("_routing", this.routing());
|
||||
}
|
||||
if (this.fetchSourceContext() != null) {
|
||||
FetchSourceContext source = this.fetchSourceContext();
|
||||
String[] includes = source.includes();
|
||||
String[] excludes = source.excludes();
|
||||
if (includes.length == 0 && excludes.length == 0) {
|
||||
builder.field("_source", source.fetchSource());
|
||||
} else if (includes.length > 0 && excludes.length == 0) {
|
||||
builder.array("_source", source.includes());
|
||||
} else if (excludes.length > 0) {
|
||||
builder.startObject("_source");
|
||||
if (includes.length > 0) {
|
||||
builder.array("includes", source.includes());
|
||||
}
|
||||
builder.array("excludes", source.excludes());
|
||||
builder.endObject();
|
||||
}
|
||||
}
|
||||
if (this.version() != Versions.MATCH_ANY) {
|
||||
builder.field("_version", this.version());
|
||||
}
|
||||
if (this.versionType() != VersionType.INTERNAL) {
|
||||
builder.field("_version_type", this.versionType().toString().toLowerCase(Locale.ROOT));
|
||||
}
|
||||
return builder.endObject();
|
||||
}
|
||||
}
|
||||
|
||||
private final String[] fields;
|
||||
|
||||
private String likeText;
|
||||
private List<String> ids = new ArrayList<>();
|
||||
private List<Item> docs = new ArrayList<>();
|
||||
private Boolean exclude = null;
|
||||
private float percentTermsToMatch = -1;
|
||||
private int minTermFreq = -1;
|
||||
private int maxQueryTerms = -1;
|
||||
|
@ -71,6 +141,26 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
|
|||
return this;
|
||||
}
|
||||
|
||||
public MoreLikeThisQueryBuilder ids(String... ids) {
|
||||
this.ids = Arrays.asList(ids);
|
||||
return this;
|
||||
}
|
||||
|
||||
public MoreLikeThisQueryBuilder docs(Item... docs) {
|
||||
this.docs = Arrays.asList(docs);
|
||||
return this;
|
||||
}
|
||||
|
||||
public MoreLikeThisQueryBuilder addItem(Item item) {
|
||||
this.docs.add(item);
|
||||
return this;
|
||||
}
|
||||
|
||||
public MoreLikeThisQueryBuilder exclude(boolean exclude) {
|
||||
this.exclude = exclude;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* The percentage of terms to match. Defaults to <tt>0.3</tt>.
|
||||
*/
|
||||
|
@ -192,9 +282,9 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
|
|||
}
|
||||
builder.endArray();
|
||||
}
|
||||
if (likeText == null) {
|
||||
throw new ElasticsearchIllegalArgumentException("moreLikeThis requires '"+
|
||||
MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName() +"' to be provided");
|
||||
if (likeText == null && this.docs.isEmpty() && this.ids.isEmpty()) {
|
||||
throw new ElasticsearchIllegalArgumentException("more_like_this requires either '"+
|
||||
MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName() +"' or 'docs/ids' to be provided");
|
||||
}
|
||||
builder.field(MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName(), likeText);
|
||||
if (percentTermsToMatch != -1) {
|
||||
|
@ -240,6 +330,15 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
|
|||
if (queryName != null) {
|
||||
builder.field("_name", queryName);
|
||||
}
|
||||
if (!ids.isEmpty()) {
|
||||
builder.array("ids", ids.toArray());
|
||||
}
|
||||
if (!docs.isEmpty()) {
|
||||
builder.array("docs", docs.toArray());
|
||||
}
|
||||
if (exclude != null) {
|
||||
builder.field("exclude", exclude);
|
||||
}
|
||||
builder.endObject();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,19 +22,26 @@ package org.elasticsearch.index.query;
|
|||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.TermsFilter;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.action.get.MultiGetRequest;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
import org.elasticsearch.index.mapper.Uid;
|
||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -42,8 +49,8 @@ import java.util.Set;
|
|||
public class MoreLikeThisQueryParser implements QueryParser {
|
||||
|
||||
public static final String NAME = "mlt";
|
||||
|
||||
|
||||
private MoreLikeThisFetchService fetchService = null;
|
||||
|
||||
public static class Fields {
|
||||
public static final ParseField LIKE_TEXT = new ParseField("like_text");
|
||||
public static final ParseField MIN_TERM_FREQ = new ParseField("min_term_freq");
|
||||
|
@ -56,10 +63,18 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
public static final ParseField PERCENT_TERMS_TO_MATCH = new ParseField("percent_terms_to_match");
|
||||
public static final ParseField FAIL_ON_UNSUPPORTED_FIELD = new ParseField("fail_on_unsupported_field");
|
||||
public static final ParseField STOP_WORDS = new ParseField("stop_words");
|
||||
}
|
||||
public static final ParseField DOCUMENT_IDS = new ParseField("ids");
|
||||
public static final ParseField DOCUMENTS = new ParseField("docs");
|
||||
public static final ParseField EXCLUDE = new ParseField("exclude");
|
||||
}
|
||||
|
||||
@Inject
|
||||
public MoreLikeThisQueryParser() {
|
||||
|
||||
}
|
||||
|
||||
@Inject(optional = true)
|
||||
public void setFetchService(@Nullable MoreLikeThisFetchService fetchService) {
|
||||
this.fetchService = fetchService;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -77,9 +92,11 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
List<String> moreLikeFields = null;
|
||||
boolean failOnUnsupportedField = true;
|
||||
String queryName = null;
|
||||
boolean exclude = true;
|
||||
|
||||
XContentParser.Token token;
|
||||
String currentFieldName = null;
|
||||
List<MultiGetRequest.Item> items = new ArrayList<MultiGetRequest.Item>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
|
@ -114,10 +131,12 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
failOnUnsupportedField = parser.booleanValue();
|
||||
} else if ("_name".equals(currentFieldName)) {
|
||||
queryName = parser.text();
|
||||
} else if (Fields.EXCLUDE.match(currentFieldName, parseContext.parseFlags())) {
|
||||
exclude = parser.booleanValue();
|
||||
} else {
|
||||
throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]");
|
||||
}
|
||||
} else if (token == XContentParser.Token.START_ARRAY) {
|
||||
} else if (token == XContentParser.Token.START_ARRAY) {
|
||||
if (Fields.STOP_WORDS.match(currentFieldName, parseContext.parseFlags())) {
|
||||
Set<String> stopWords = Sets.newHashSet();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
|
@ -129,14 +148,18 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
moreLikeFields.add(parseContext.indexName(parser.text()));
|
||||
}
|
||||
} else if (Fields.DOCUMENT_IDS.match(currentFieldName, parseContext.parseFlags())) {
|
||||
MultiGetRequest.parseIds(parser, items);
|
||||
} else if (Fields.DOCUMENTS.match(currentFieldName, parseContext.parseFlags())) {
|
||||
MultiGetRequest.parseDocuments(parser, items);
|
||||
} else {
|
||||
throw new QueryParsingException(parseContext.index(), "[mlt] query does not support [" + currentFieldName + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mltQuery.getLikeText() == null) {
|
||||
throw new QueryParsingException(parseContext.index(), "more_like_this requires 'like_text' to be specified");
|
||||
if ((mltQuery.getLikeText() == null && items.isEmpty()) || (mltQuery.getLikeText() != null && !items.isEmpty())) {
|
||||
throw new QueryParsingException(parseContext.index(), "more_like_this requires either 'like_text' or 'ids/docs' to be specified");
|
||||
}
|
||||
|
||||
if (analyzer == null) {
|
||||
|
@ -150,6 +173,75 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
throw new QueryParsingException(parseContext.index(), "more_like_this requires 'fields' to be non-empty");
|
||||
}
|
||||
|
||||
removeUnsupportedFields(moreLikeFields, analyzer, failOnUnsupportedField);
|
||||
if (moreLikeFields.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY));
|
||||
|
||||
if (queryName != null) {
|
||||
parseContext.addNamedQuery(queryName, mltQuery);
|
||||
}
|
||||
|
||||
if (!items.isEmpty()) {
|
||||
// set default index, type and fields if not specified
|
||||
for (MultiGetRequest.Item item : items) {
|
||||
if (item.index() == null) {
|
||||
item.index(parseContext.index().name());
|
||||
}
|
||||
if (item.type() == null) {
|
||||
if (parseContext.queryTypes().size() > 1) {
|
||||
throw new QueryParsingException(parseContext.index(),
|
||||
"ambiguous type for item with id: " + item.id() + " and index: " + item.index());
|
||||
} else {
|
||||
item.type(parseContext.queryTypes().iterator().next());
|
||||
}
|
||||
}
|
||||
if (item.fields() == null && item.fetchSourceContext() == null) {
|
||||
item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()]));
|
||||
} else {
|
||||
// TODO how about fields content fetched from _source?
|
||||
removeUnsupportedFields(item, analyzer, failOnUnsupportedField);
|
||||
}
|
||||
}
|
||||
// fetching the items with multi-get
|
||||
List<MoreLikeThisFetchService.LikeText> likeTexts = fetchService.fetch(items);
|
||||
// right now we are just building a boolean query
|
||||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
for (MoreLikeThisFetchService.LikeText likeText : likeTexts) {
|
||||
addMoreLikeThis(boolQuery, mltQuery, likeText.field, likeText.text);
|
||||
}
|
||||
// exclude the items from the search
|
||||
if (exclude) {
|
||||
TermsFilter filter = new TermsFilter(UidFieldMapper.NAME, Uid.createUids(items));
|
||||
ConstantScoreQuery query = new ConstantScoreQuery(filter);
|
||||
boolQuery.add(query, BooleanClause.Occur.MUST_NOT);
|
||||
}
|
||||
return boolQuery;
|
||||
}
|
||||
|
||||
return mltQuery;
|
||||
}
|
||||
|
||||
private void addMoreLikeThis(BooleanQuery boolQuery, MoreLikeThisQuery mltQuery, String fieldName, String likeText) {
|
||||
MoreLikeThisQuery mlt = new MoreLikeThisQuery();
|
||||
mlt.setMoreLikeFields(new String[] {fieldName});
|
||||
mlt.setLikeText(likeText);
|
||||
mlt.setAnalyzer(mltQuery.getAnalyzer());
|
||||
mlt.setPercentTermsToMatch(mltQuery.getPercentTermsToMatch());
|
||||
mlt.setBoostTerms(mltQuery.isBoostTerms());
|
||||
mlt.setBoostTermsFactor(mltQuery.getBoostTermsFactor());
|
||||
mlt.setMinDocFreq(mltQuery.getMinDocFreq());
|
||||
mlt.setMaxDocFreq(mltQuery.getMaxDocFreq());
|
||||
mlt.setMinWordLen(mltQuery.getMinWordLen());
|
||||
mlt.setMaxWordLen(mltQuery.getMaxWordLen());
|
||||
mlt.setMinTermFrequency(mltQuery.getMinTermFrequency());
|
||||
mlt.setMaxQueryTerms(mltQuery.getMaxQueryTerms());
|
||||
mlt.setStopWords(mltQuery.getStopWords());
|
||||
boolQuery.add(mlt, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
|
||||
private List<String> removeUnsupportedFields(List<String> moreLikeFields, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
|
||||
for (Iterator<String> it = moreLikeFields.iterator(); it.hasNext(); ) {
|
||||
final String fieldName = it.next();
|
||||
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
|
||||
|
@ -160,13 +252,11 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
}
|
||||
}
|
||||
}
|
||||
if (moreLikeFields.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY));
|
||||
if (queryName != null) {
|
||||
parseContext.addNamedQuery(queryName, mltQuery);
|
||||
}
|
||||
return mltQuery;
|
||||
return moreLikeFields;
|
||||
}
|
||||
|
||||
private void removeUnsupportedFields(MultiGetRequest.Item item, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
|
||||
item.fields((String[]) removeUnsupportedFields(Arrays.asList(item.fields()), analyzer, failOnUnsupportedField).toArray());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.search.morelikethis;
|
||||
|
||||
import org.elasticsearch.action.get.GetResponse;
|
||||
import org.elasticsearch.action.get.MultiGetItemResponse;
|
||||
import org.elasticsearch.action.get.MultiGetRequest;
|
||||
import org.elasticsearch.action.get.MultiGetResponse;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.get.GetField;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class MoreLikeThisFetchService extends AbstractComponent {
|
||||
|
||||
public static final class LikeText {
|
||||
public final String field;
|
||||
public final String text;
|
||||
|
||||
public LikeText(String field, String text) {
|
||||
this.field = field;
|
||||
this.text = text;
|
||||
}
|
||||
}
|
||||
|
||||
private final Client client;
|
||||
|
||||
@Inject
|
||||
public MoreLikeThisFetchService(Client client, Settings settings) {
|
||||
super(settings);
|
||||
this.client = client;
|
||||
}
|
||||
|
||||
public List<LikeText> fetch(List<MultiGetRequest.Item> items) throws IOException {
|
||||
MultiGetRequest request = new MultiGetRequest();
|
||||
for (MultiGetRequest.Item item : items) {
|
||||
request.add(item);
|
||||
}
|
||||
MultiGetResponse responses = client.multiGet(request).actionGet();
|
||||
List<LikeText> likeTexts = new ArrayList<>();
|
||||
for (MultiGetItemResponse response : responses) {
|
||||
if (response.isFailed()) {
|
||||
continue;
|
||||
}
|
||||
GetResponse getResponse = response.getResponse();
|
||||
if (!getResponse.isExists()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (GetField getField : getResponse.getFields().values()) {
|
||||
for (Object value : getField.getValues()) {
|
||||
likeTexts.add(new LikeText(getField.getName(), value.toString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
return likeTexts;
|
||||
}
|
||||
}
|
|
@ -24,6 +24,7 @@ import org.elasticsearch.common.inject.AbstractModule;
|
|||
import org.elasticsearch.common.inject.Module;
|
||||
import org.elasticsearch.common.inject.SpawnModules;
|
||||
import org.elasticsearch.index.query.functionscore.FunctionScoreModule;
|
||||
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
|
||||
import org.elasticsearch.search.action.SearchServiceTransportAction;
|
||||
import org.elasticsearch.search.aggregations.AggregationModule;
|
||||
import org.elasticsearch.search.controller.SearchPhaseController;
|
||||
|
@ -70,5 +71,6 @@ public class SearchModule extends AbstractModule implements SpawnModules {
|
|||
bind(HighlightPhase.class).asEagerSingleton();
|
||||
|
||||
bind(SearchServiceTransportAction.class).asEagerSingleton();
|
||||
bind(MoreLikeThisFetchService.class).asEagerSingleton();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.elasticsearch.common.io.stream.Streamable;
|
|||
import org.elasticsearch.rest.RestRequest;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
*/
|
||||
|
@ -162,4 +163,26 @@ public class FetchSourceContext implements Streamable {
|
|||
out.writeStringArray(includes);
|
||||
out.writeStringArray(excludes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
FetchSourceContext that = (FetchSourceContext) o;
|
||||
|
||||
if (fetchSource != that.fetchSource) return false;
|
||||
if (!Arrays.equals(excludes, that.excludes)) return false;
|
||||
if (!Arrays.equals(includes, that.includes)) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = (fetchSource ? 1 : 0);
|
||||
result = 31 * result + (includes != null ? Arrays.hashCode(includes) : 0);
|
||||
result = 31 * result + (excludes != null ? Arrays.hashCode(excludes) : 0);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,168 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
import org.elasticsearch.action.get.MultiGetRequest;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||
import org.elasticsearch.index.VersionType;
|
||||
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
|
||||
import org.elasticsearch.search.fetch.source.FetchSourceContext;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
|
||||
public class ItemSerializationTests extends ElasticsearchTestCase {
|
||||
|
||||
private String[] generateRandomStringArray(int arraySize, int stringSize) {
|
||||
String[] array = randomBoolean() ? new String[randomInt(arraySize)] : null; // allow empty arrays
|
||||
if (array != null) {
|
||||
for (int i = 0; i < array.length; i++) {
|
||||
array[i] = randomAsciiOfLength(stringSize);
|
||||
}
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
private Item generateRandomItem(int arraySize, int stringSize) {
|
||||
String index = randomAsciiOfLength(stringSize);
|
||||
String type = randomAsciiOfLength(stringSize);
|
||||
String id = String.valueOf(Math.abs(randomInt()));
|
||||
String routing = randomBoolean() ? randomAsciiOfLength(stringSize) : null;
|
||||
String[] fields = generateRandomStringArray(arraySize, stringSize);
|
||||
|
||||
long version = Math.abs(randomLong());
|
||||
VersionType versionType = RandomPicks.randomFrom(new Random(), VersionType.values());
|
||||
|
||||
FetchSourceContext fetchSourceContext;
|
||||
switch (randomIntBetween(0, 3)) {
|
||||
case 0 :
|
||||
fetchSourceContext = new FetchSourceContext(randomBoolean());
|
||||
break;
|
||||
case 1 :
|
||||
fetchSourceContext = new FetchSourceContext(generateRandomStringArray(arraySize, stringSize));
|
||||
break;
|
||||
case 2 :
|
||||
fetchSourceContext = new FetchSourceContext(generateRandomStringArray(arraySize, stringSize),
|
||||
generateRandomStringArray(arraySize, stringSize));
|
||||
break;
|
||||
default:
|
||||
fetchSourceContext = null;
|
||||
break;
|
||||
}
|
||||
return (Item) new Item(index, type, id).routing(routing).fields(fields).version(version).versionType(versionType)
|
||||
.fetchSourceContext(fetchSourceContext);
|
||||
}
|
||||
|
||||
private String ItemToJSON(Item item) throws IOException {
|
||||
XContentBuilder builder = XContentFactory.jsonBuilder();
|
||||
builder.startObject();
|
||||
builder.startArray("docs");
|
||||
item.toXContent(builder, ToXContent.EMPTY_PARAMS);
|
||||
builder.endArray();
|
||||
builder.endObject();
|
||||
return XContentHelper.convertToJson(builder.bytes(), false);
|
||||
}
|
||||
|
||||
private MultiGetRequest.Item JSONtoItem(String json) throws Exception {
|
||||
MultiGetRequest request = new MultiGetRequest().add(null, null, null, null, new BytesArray(json), true);
|
||||
return request.getItems().get(0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testItemSerialization() throws Exception {
|
||||
int numOfTrials = 100;
|
||||
int maxArraySize = 7;
|
||||
int maxStringSize = 8;
|
||||
for (int i = 0; i < numOfTrials; i++) {
|
||||
Item item1 = generateRandomItem(maxArraySize, maxStringSize);
|
||||
String json = ItemToJSON(item1);
|
||||
MultiGetRequest.Item item2 = JSONtoItem(json);
|
||||
assertEquals(item1, item2);
|
||||
}
|
||||
}
|
||||
|
||||
private List<MultiGetRequest.Item> testItemsFromJSON(String json) throws Exception {
|
||||
MultiGetRequest request = new MultiGetRequest();
|
||||
request.add(null, null, null, null, new BytesArray(json), true);
|
||||
List<MultiGetRequest.Item> items = request.getItems();
|
||||
|
||||
assertEquals(items.size(), 3);
|
||||
for (MultiGetRequest.Item item : items) {
|
||||
assertThat(item.index(), is("test"));
|
||||
assertThat(item.type(), is("type"));
|
||||
FetchSourceContext fetchSource = item.fetchSourceContext();
|
||||
switch (item.id()) {
|
||||
case "1" :
|
||||
assertThat(fetchSource.fetchSource(), is(false));
|
||||
break;
|
||||
case "2" :
|
||||
assertThat(fetchSource.fetchSource(), is(true));
|
||||
assertThat(fetchSource.includes(), is(new String[]{"field3", "field4"}));
|
||||
break;
|
||||
case "3" :
|
||||
assertThat(fetchSource.fetchSource(), is(true));
|
||||
assertThat(fetchSource.includes(), is(new String[]{"user"}));
|
||||
assertThat(fetchSource.excludes(), is(new String[]{"user.location"}));
|
||||
break;
|
||||
default:
|
||||
fail("item with id: " + item.id() + " is not 1, 2 or 3");
|
||||
break;
|
||||
}
|
||||
}
|
||||
return items;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleItemSerializationFromFile() throws Exception {
|
||||
// test items from JSON
|
||||
List<MultiGetRequest.Item> itemsFromJSON = testItemsFromJSON(
|
||||
copyToStringFromClasspath("/org/elasticsearch/index/query/items.json"));
|
||||
|
||||
// create builder from items
|
||||
XContentBuilder builder = XContentFactory.jsonBuilder();
|
||||
builder.startObject();
|
||||
builder.startArray("docs");
|
||||
for (MultiGetRequest.Item item : itemsFromJSON) {
|
||||
MoreLikeThisQueryBuilder.Item itemForBuilder = (MoreLikeThisQueryBuilder.Item) new MoreLikeThisQueryBuilder.Item(
|
||||
item.index(), item.type(), item.id())
|
||||
.fetchSourceContext(item.fetchSourceContext())
|
||||
.fields(item.fields());
|
||||
itemForBuilder.toXContent(builder, ToXContent.EMPTY_PARAMS);
|
||||
}
|
||||
builder.endArray();
|
||||
builder.endObject();
|
||||
|
||||
// verify generated JSON lead to the same items
|
||||
String json = XContentHelper.convertToJson(builder.bytes(), false);
|
||||
testItemsFromJSON(json);
|
||||
}
|
||||
|
||||
}
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.spatial.prefix.IntersectsPrefixTreeFilter;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.action.get.MultiGetRequest;
|
||||
import org.elasticsearch.cache.recycler.CacheRecyclerModule;
|
||||
import org.elasticsearch.cluster.ClusterService;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
|
@ -63,6 +64,7 @@ import org.elasticsearch.index.search.NumericRangeFieldDataFilter;
|
|||
import org.elasticsearch.index.search.geo.GeoDistanceFilter;
|
||||
import org.elasticsearch.index.search.geo.GeoPolygonFilter;
|
||||
import org.elasticsearch.index.search.geo.InMemoryGeoBoundingBoxFilter;
|
||||
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
|
||||
import org.elasticsearch.index.settings.IndexSettingsModule;
|
||||
import org.elasticsearch.index.similarity.SimilarityModule;
|
||||
import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;
|
||||
|
@ -80,6 +82,7 @@ import org.junit.Test;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -137,6 +140,7 @@ public class SimpleIndexQueryParserTests extends ElasticsearchTestCase {
|
|||
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/query/mapping.json");
|
||||
injector.getInstance(MapperService.class).merge("person", new CompressedString(mapping), true);
|
||||
injector.getInstance(MapperService.class).documentMapper("person").parse(new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/index/query/data.json")));
|
||||
|
||||
queryParser = injector.getInstance(IndexQueryParserService.class);
|
||||
}
|
||||
|
||||
|
@ -1671,6 +1675,58 @@ public class SimpleIndexQueryParserTests extends ElasticsearchTestCase {
|
|||
assertThat(mltQuery.getMaxQueryTerms(), equalTo(12));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMoreLikeThisIds() throws Exception {
|
||||
MoreLikeThisQueryParser parser = (MoreLikeThisQueryParser) queryParser.queryParser("more_like_this");
|
||||
parser.setFetchService(new MockMoreLikeThisFetchService());
|
||||
|
||||
List<MoreLikeThisFetchService.LikeText> likeTexts = new ArrayList<>();
|
||||
String index = "test";
|
||||
String type = "person";
|
||||
for (int i = 1; i < 5; i++) {
|
||||
for (String field : new String[]{"name.first", "name.last"}) {
|
||||
MoreLikeThisFetchService.LikeText likeText = new MoreLikeThisFetchService.LikeText(
|
||||
field, index + " " + type + " " + i + " " + field);
|
||||
likeTexts.add(likeText);
|
||||
}
|
||||
}
|
||||
|
||||
IndexQueryParserService queryParser = queryParser();
|
||||
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/mlt-ids.json");
|
||||
Query parsedQuery = queryParser.parse(query).query();
|
||||
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
|
||||
BooleanQuery booleanQuery = (BooleanQuery) parsedQuery;
|
||||
assertThat(booleanQuery.getClauses().length, is(likeTexts.size()));
|
||||
|
||||
for (int i=0; i<likeTexts.size(); i++) {
|
||||
BooleanClause booleanClause = booleanQuery.getClauses()[i];
|
||||
assertThat(booleanClause.getOccur(), is(BooleanClause.Occur.SHOULD));
|
||||
assertThat(booleanClause.getQuery(), instanceOf(MoreLikeThisQuery.class));
|
||||
MoreLikeThisQuery mltQuery = (MoreLikeThisQuery) booleanClause.getQuery();
|
||||
assertThat(mltQuery.getLikeText(), is(likeTexts.get(i).text));
|
||||
assertThat(mltQuery.getMoreLikeFields()[0], equalTo(likeTexts.get(i).field));
|
||||
}
|
||||
}
|
||||
|
||||
private static class MockMoreLikeThisFetchService extends MoreLikeThisFetchService {
|
||||
|
||||
public MockMoreLikeThisFetchService() {
|
||||
super(null, ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||
}
|
||||
|
||||
public List<LikeText> fetch(List<MultiGetRequest.Item> items) throws IOException {
|
||||
List<LikeText> likeTexts = new ArrayList<>();
|
||||
for (MultiGetRequest.Item item: items) {
|
||||
for (String field : item.fields()) {
|
||||
LikeText likeText = new LikeText(
|
||||
field, item.index() + " " + item.type() + " " + item.id() + " " + field);
|
||||
likeTexts.add(likeText);
|
||||
}
|
||||
}
|
||||
return likeTexts;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFuzzyLikeThisBuilder() throws Exception {
|
||||
IndexQueryParserService queryParser = queryParser();
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
{
|
||||
"docs" : [
|
||||
{
|
||||
"_index" : "test",
|
||||
"_type" : "type",
|
||||
"_id" : "1",
|
||||
"_source" : false
|
||||
},
|
||||
{
|
||||
"_index" : "test",
|
||||
"_type" : "type",
|
||||
"_id" : "2",
|
||||
"_source" : ["field3", "field4"]
|
||||
},
|
||||
{
|
||||
"_index" : "test",
|
||||
"_type" : "type",
|
||||
"_id" : "3",
|
||||
"_source" : {
|
||||
"include": ["user"],
|
||||
"exclude": ["user.location"]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
more_like_this:{
|
||||
"fields" : ["name.first", "name.last"],
|
||||
"docs" : [
|
||||
{
|
||||
"_index" : "test",
|
||||
"_type" : "person",
|
||||
"_id" : "1"
|
||||
},
|
||||
{
|
||||
"_index" : "test",
|
||||
"_type" : "person",
|
||||
"_id" : "2"
|
||||
}
|
||||
],
|
||||
"ids" : ["3", "4"],
|
||||
"exclude" : false,
|
||||
"min_term_freq" : 1,
|
||||
"max_query_terms" : 12
|
||||
}
|
||||
}
|
|
@ -20,12 +20,18 @@
|
|||
package org.elasticsearch.mlt;
|
||||
|
||||
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
|
||||
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
|
||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
import org.elasticsearch.action.mlt.MoreLikeThisRequest;
|
||||
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.action.search.SearchType;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.search.SearchHit;
|
||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||
import org.junit.Test;
|
||||
|
@ -341,4 +347,118 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest {
|
|||
assertEquals(mltResponse.getHits().hits().length, 8);
|
||||
}
|
||||
|
||||
public void testSimpleMoreLikeThisIds() throws Exception {
|
||||
logger.info("Creating index test");
|
||||
assertAcked(prepareCreate("test").addMapping("type1",
|
||||
jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||
.startObject("text").field("type", "string").endObject()
|
||||
.endObject().endObject().endObject()));
|
||||
|
||||
logger.info("Running Cluster Health");
|
||||
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
|
||||
|
||||
logger.info("Indexing...");
|
||||
List<IndexRequestBuilder> builders = new ArrayList<>();
|
||||
builders.add(client().prepareIndex("test", "type1").setSource("text", "lucene").setId("1"));
|
||||
builders.add(client().prepareIndex("test", "type1").setSource("text", "lucene release").setId("2"));
|
||||
builders.add(client().prepareIndex("test", "type1").setSource("text", "apache lucene").setId("3"));
|
||||
indexRandom(true, builders);
|
||||
|
||||
logger.info("Running MoreLikeThis");
|
||||
MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").ids("1").exclude(false).minTermFreq(1).minDocFreq(1);
|
||||
SearchResponse mltResponse = client().prepareSearch().setTypes("type1").setQuery(queryBuilder).execute().actionGet();
|
||||
assertHitCount(mltResponse, 3l);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompareMoreLikeThisDSLWithAPI() throws Exception {
|
||||
logger.info("Creating index test");
|
||||
assertAcked(prepareCreate("test").addMapping("type1",
|
||||
jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||
.startObject("text").field("type", "string").endObject()
|
||||
.endObject().endObject().endObject()));
|
||||
|
||||
logger.info("Running Cluster Health");
|
||||
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
|
||||
|
||||
logger.info("Indexing...");
|
||||
String[] texts = new String[] {
|
||||
"Apache Lucene",
|
||||
"free and open source",
|
||||
"information retrieval",
|
||||
"software library",
|
||||
"programmed in Java",
|
||||
"Doug Cutting",
|
||||
"Apache Software Foundation",
|
||||
"Apache Software License",
|
||||
"Lucene programming languages",
|
||||
"Delphi, Perl, C#, C++, Python, Ruby, and PHP"
|
||||
};
|
||||
List<IndexRequestBuilder> builders = new ArrayList<>(10);
|
||||
for (int i = 0; i < texts.length; i++) {
|
||||
builders.add(client().prepareIndex("test", "type1").setSource("text", texts[i]).setId(String.valueOf(i)));
|
||||
}
|
||||
indexRandom(true, builders);
|
||||
|
||||
logger.info("Running MoreLikeThis DSL with IDs");
|
||||
Client client = client();
|
||||
MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").ids("0").minTermFreq(1).minDocFreq(1);
|
||||
SearchResponse mltResponseDSL = client.prepareSearch()
|
||||
.setSearchType(SearchType.QUERY_THEN_FETCH)
|
||||
.setTypes("type1")
|
||||
.setQuery(queryBuilder)
|
||||
.execute().actionGet();
|
||||
assertSearchResponse(mltResponseDSL);
|
||||
|
||||
logger.info("Running MoreLikeThis API");
|
||||
MoreLikeThisRequest mltRequest = moreLikeThisRequest("test").type("type1").id("0").minTermFreq(1).minDocFreq(1);
|
||||
SearchResponse mltResponseAPI = client.moreLikeThis(mltRequest).actionGet();
|
||||
assertSearchResponse(mltResponseAPI);
|
||||
|
||||
logger.info("Ensure the documents and scores returned are the same.");
|
||||
SearchHit[] hitsDSL = mltResponseDSL.getHits().hits();
|
||||
SearchHit[] hitsAPI = mltResponseAPI.getHits().hits();
|
||||
assertThat("Not the same number of results.", hitsAPI.length, equalTo(hitsDSL.length));
|
||||
for (int i = 0; i < hitsDSL.length; i++) {
|
||||
assertThat("Expected id: " + hitsDSL[i].getId() + " at position " + i + " but wasn't.",
|
||||
hitsAPI[i].getId(), equalTo(hitsDSL[i].getId()));
|
||||
assertThat("Expected score: " + hitsDSL[i].getScore() + " at position " + i + " but wasn't.",
|
||||
hitsAPI[i].getScore(), equalTo(hitsDSL[i].getScore()));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleMoreLikeThisIdsMultipleTypes() throws Exception {
|
||||
logger.info("Creating index test");
|
||||
int numOfTypes = randomIntBetween(2, 10);
|
||||
CreateIndexRequestBuilder createRequestBuilder = prepareCreate("test");
|
||||
for (int i = 0; i < numOfTypes; i++) {
|
||||
createRequestBuilder.addMapping("type" + i, jsonBuilder().startObject().startObject("type" + i).startObject("properties")
|
||||
.startObject("text").field("type", "string").endObject()
|
||||
.endObject().endObject().endObject());
|
||||
}
|
||||
assertAcked(createRequestBuilder);
|
||||
|
||||
logger.info("Running Cluster Health");
|
||||
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
|
||||
|
||||
logger.info("Indexing...");
|
||||
List<IndexRequestBuilder> builders = new ArrayList<>(numOfTypes);
|
||||
for (int i = 0; i < numOfTypes; i++) {
|
||||
builders.add(client().prepareIndex("test", "type" + i).setSource("text", "lucene" + " " + i).setId(String.valueOf(i)));
|
||||
}
|
||||
indexRandom(true, builders);
|
||||
|
||||
logger.info("Running MoreLikeThis");
|
||||
MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").exclude(false).minTermFreq(1).minDocFreq(1)
|
||||
.addItem(new MoreLikeThisQueryBuilder.Item("test", "type0", "0"));
|
||||
|
||||
String[] types = new String[numOfTypes];
|
||||
for (int i = 0; i < numOfTypes; i++) {
|
||||
types[i] = "type"+i;
|
||||
}
|
||||
SearchResponse mltResponse = client().prepareSearch().setTypes(types).setQuery(queryBuilder).execute().actionGet();
|
||||
assertHitCount(mltResponse, numOfTypes);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue