MLT: builder takes a new Item object like its parser
Previously the parser could take any Term Vectors request, but this would be not the case of the builder which would still use MultiGetRequest.Item. This introduces a new Item class which is used by both the builder and parser. Beyond that the rest is mostly cleanups such as: 1) Deprecating the ignoreLike methods, in favor to using unlike. 2) Deprecating and renaming MoreLikeThisBuilder#addItem to addLikeItem. 3) Ordering the methods of MoreLikeThisBuilder more logically. This change is needed for the upcoming query refactoring of MLT. Closes #13372
This commit is contained in:
parent
e23d116bc5
commit
a45ee273e3
|
@ -640,7 +640,7 @@ public class TermVectorsRequest extends SingleShardRequest<TermVectorsRequest> i
|
|||
}
|
||||
}
|
||||
|
||||
private static Map<String, String> readPerFieldAnalyzer(Map<String, Object> map) {
|
||||
public static Map<String, String> readPerFieldAnalyzer(Map<String, Object> map) {
|
||||
Map<String, String> mapStrStr = new HashMap<>();
|
||||
for (Map.Entry<String, Object> e : map.entrySet()) {
|
||||
if (e.getValue() instanceof String) {
|
||||
|
|
|
@ -253,12 +253,12 @@ public class MoreLikeThisQuery extends Query {
|
|||
setLikeText(likeText.toArray(Strings.EMPTY_ARRAY));
|
||||
}
|
||||
|
||||
public void setUnlikeText(Fields... ignoreFields) {
|
||||
this.unlikeFields = ignoreFields;
|
||||
public void setUnlikeText(Fields... unlikeFields) {
|
||||
this.unlikeFields = unlikeFields;
|
||||
}
|
||||
|
||||
public void setIgnoreText(List<String> ignoreText) {
|
||||
this.unlikeText = ignoreText.toArray(Strings.EMPTY_ARRAY);
|
||||
public void setUnlikeText(List<String> unlikeText) {
|
||||
this.unlikeText = unlikeText.toArray(Strings.EMPTY_ARRAY);
|
||||
}
|
||||
|
||||
public String[] getMoreLikeFields() {
|
||||
|
|
|
@ -19,131 +19,357 @@
|
|||
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.elasticsearch.action.get.MultiGetRequest;
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.action.termvectors.TermVectorsRequest;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.lucene.uid.Versions;
|
||||
import org.elasticsearch.common.xcontent.*;
|
||||
import org.elasticsearch.index.VersionType;
|
||||
import org.elasticsearch.search.fetch.source.FetchSourceContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.*;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
|
||||
/**
|
||||
* A more like this query that finds documents that are "like" the provided {@link #likeText(String)}
|
||||
* which is checked against the fields the query is constructed with.
|
||||
* A more like this query that finds documents that are "like" the provided set of document(s).
|
||||
*
|
||||
* The documents are provided as a set of strings and/or a list of {@link Item}.
|
||||
*/
|
||||
public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQueryBuilder<MoreLikeThisQueryBuilder> {
|
||||
|
||||
/**
|
||||
* A single get item. Pure delegate to multi get.
|
||||
* A single item to be used for a {@link MoreLikeThisQueryBuilder}.
|
||||
*/
|
||||
public static final class Item extends MultiGetRequest.Item implements ToXContent {
|
||||
public static final class Item implements ToXContent {
|
||||
public static final Item[] EMPTY_ARRAY = new Item[0];
|
||||
|
||||
public interface Field {
|
||||
ParseField INDEX = new ParseField("_index");
|
||||
ParseField TYPE = new ParseField("_type");
|
||||
ParseField ID = new ParseField("_id");
|
||||
ParseField DOC = new ParseField("doc");
|
||||
ParseField FIELDS = new ParseField("fields");
|
||||
ParseField PER_FIELD_ANALYZER = new ParseField("per_field_analyzer");
|
||||
ParseField ROUTING = new ParseField("_routing");
|
||||
ParseField VERSION = new ParseField("_version");
|
||||
ParseField VERSION_TYPE = new ParseField("_version_type");
|
||||
}
|
||||
|
||||
private String index;
|
||||
private String type;
|
||||
private String id;
|
||||
private BytesReference doc;
|
||||
private String likeText;
|
||||
private String[] fields;
|
||||
private Map<String, String> perFieldAnalyzer;
|
||||
private String routing;
|
||||
private long version = Versions.MATCH_ANY;
|
||||
private VersionType versionType = VersionType.INTERNAL;
|
||||
|
||||
public Item() {
|
||||
super();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor for a given item / document request
|
||||
*
|
||||
* @param index the index where the document is located
|
||||
* @param type the type of the document
|
||||
* @param id and its id
|
||||
*/
|
||||
public Item(String index, @Nullable String type, String id) {
|
||||
super(index, type, id);
|
||||
this.index = index;
|
||||
this.type = type;
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public Item(String likeText) {
|
||||
this.likeText = likeText;
|
||||
/**
|
||||
* Constructor for an artificial document request, that is not present in the index.
|
||||
*
|
||||
* @param index the index to be used for parsing the doc
|
||||
* @param type the type to be used for parsing the doc
|
||||
* @param doc the document specification
|
||||
*/
|
||||
public Item(String index, String type, XContentBuilder doc) {
|
||||
this.index = index;
|
||||
this.type = type;
|
||||
this.doc(doc);
|
||||
}
|
||||
|
||||
public String index() {
|
||||
return index;
|
||||
}
|
||||
|
||||
public Item index(String index) {
|
||||
this.index = index;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public Item type(String type) {
|
||||
this.type = type;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String id() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public Item id(String id) {
|
||||
this.id = id;
|
||||
return this;
|
||||
}
|
||||
|
||||
public BytesReference doc() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
public Item doc(XContentBuilder doc) {
|
||||
this.doc = doc.bytes();
|
||||
/**
|
||||
* Sets to a given artificial document, that is a document that is not present in the index.
|
||||
*/
|
||||
public Item doc(BytesReference doc) {
|
||||
this.doc = doc;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets to a given artificial document, that is a document that is not present in the index.
|
||||
*/
|
||||
public Item doc(XContentBuilder doc) {
|
||||
return this.doc(doc.bytes());
|
||||
}
|
||||
|
||||
public String[] fields() {
|
||||
return fields;
|
||||
}
|
||||
|
||||
public Item fields(String... fields) {
|
||||
this.fields = fields;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Map<String, String> perFieldAnalyzer() {
|
||||
return perFieldAnalyzer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the analyzer(s) to use at any given field.
|
||||
*/
|
||||
public Item perFieldAnalyzer(Map<String, String> perFieldAnalyzer) {
|
||||
this.perFieldAnalyzer = perFieldAnalyzer;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String routing() {
|
||||
return routing;
|
||||
}
|
||||
|
||||
public Item routing(String routing) {
|
||||
this.routing = routing;
|
||||
return this;
|
||||
}
|
||||
|
||||
public long version() {
|
||||
return version;
|
||||
}
|
||||
|
||||
public Item version(long version) {
|
||||
this.version = version;
|
||||
return this;
|
||||
}
|
||||
|
||||
public VersionType versionType() {
|
||||
return versionType;
|
||||
}
|
||||
|
||||
public Item versionType(VersionType versionType) {
|
||||
this.versionType = versionType;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert this to a {@link TermVectorsRequest} for fetching the terms of the document.
|
||||
*/
|
||||
public TermVectorsRequest toTermVectorsRequest() {
|
||||
TermVectorsRequest termVectorsRequest = new TermVectorsRequest(index, type, id)
|
||||
.selectedFields(fields)
|
||||
.routing(routing)
|
||||
.version(version)
|
||||
.versionType(versionType)
|
||||
.perFieldAnalyzer(perFieldAnalyzer)
|
||||
.positions(false) // ensures these following parameters are never set
|
||||
.offsets(false)
|
||||
.payloads(false)
|
||||
.fieldStatistics(false)
|
||||
.termStatistics(false)
|
||||
.dfs(false);
|
||||
// for artificial docs to make sure that the id has changed in the item too
|
||||
if (doc != null) {
|
||||
termVectorsRequest.doc(doc, true);
|
||||
this.id(termVectorsRequest.id());
|
||||
}
|
||||
return termVectorsRequest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses and returns the given item.
|
||||
*/
|
||||
public static Item parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, Item item) throws IOException {
|
||||
XContentParser.Token token;
|
||||
String currentFieldName = null;
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if (currentFieldName != null) {
|
||||
if (parseFieldMatcher.match(currentFieldName, Field.INDEX)) {
|
||||
item.index = parser.text();
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Field.TYPE)) {
|
||||
item.type = parser.text();
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Field.ID)) {
|
||||
item.id = parser.text();
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Field.DOC)) {
|
||||
item.doc(jsonBuilder().copyCurrentStructure(parser));
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Field.FIELDS)) {
|
||||
if (token == XContentParser.Token.START_ARRAY) {
|
||||
List<String> fields = new ArrayList<>();
|
||||
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
|
||||
fields.add(parser.text());
|
||||
}
|
||||
item.fields(fields.toArray(new String[fields.size()]));
|
||||
} else {
|
||||
throw new ElasticsearchParseException(
|
||||
"failed to parse More Like This item. field [fields] must be an array");
|
||||
}
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Field.PER_FIELD_ANALYZER)) {
|
||||
item.perFieldAnalyzer(TermVectorsRequest.readPerFieldAnalyzer(parser.map()));
|
||||
} else if ("_routing".equals(currentFieldName) || "routing".equals(currentFieldName)) {
|
||||
item.routing = parser.text();
|
||||
} else if ("_version".equals(currentFieldName) || "version".equals(currentFieldName)) {
|
||||
item.version = parser.longValue();
|
||||
} else if ("_version_type".equals(currentFieldName) || "_versionType".equals(currentFieldName)
|
||||
|| "version_type".equals(currentFieldName) || "versionType".equals(currentFieldName)) {
|
||||
item.versionType = VersionType.fromString(parser.text());
|
||||
} else {
|
||||
throw new ElasticsearchParseException(
|
||||
"failed to parse More Like This item. unknown field [{}]", currentFieldName);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (item.id != null && item.doc != null) {
|
||||
throw new ElasticsearchParseException(
|
||||
"failed to parse More Like This item. either [id] or [doc] can be specified, but not both!");
|
||||
}
|
||||
return item;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
if (this.likeText != null) {
|
||||
return builder.value(this.likeText);
|
||||
}
|
||||
builder.startObject();
|
||||
if (this.index() != null) {
|
||||
builder.field("_index", this.index());
|
||||
if (this.index != null) {
|
||||
builder.field(Field.INDEX.getPreferredName(), this.index);
|
||||
}
|
||||
if (this.type() != null) {
|
||||
builder.field("_type", this.type());
|
||||
if (this.type != null) {
|
||||
builder.field(Field.TYPE.getPreferredName(), this.type);
|
||||
}
|
||||
if (this.id() != null) {
|
||||
builder.field("_id", this.id());
|
||||
if (this.id != null && this.doc == null) {
|
||||
builder.field(Field.ID.getPreferredName(), this.id);
|
||||
}
|
||||
if (this.doc() != null) {
|
||||
XContentType contentType = XContentFactory.xContentType(doc);
|
||||
if (this.doc != null) {
|
||||
XContentType contentType = XContentFactory.xContentType(this.doc);
|
||||
if (contentType == builder.contentType()) {
|
||||
builder.rawField("doc", doc);
|
||||
builder.rawField(Field.DOC.getPreferredName(), this.doc);
|
||||
} else {
|
||||
XContentParser parser = XContentFactory.xContent(contentType).createParser(doc);
|
||||
XContentParser parser = XContentFactory.xContent(contentType).createParser(this.doc);
|
||||
parser.nextToken();
|
||||
builder.field("doc");
|
||||
builder.field(Field.DOC.getPreferredName());
|
||||
builder.copyCurrentStructure(parser);
|
||||
}
|
||||
}
|
||||
if (this.fields() != null) {
|
||||
builder.array("fields", this.fields());
|
||||
if (this.fields != null) {
|
||||
builder.array(Field.FIELDS.getPreferredName(), this.fields);
|
||||
}
|
||||
if (this.routing() != null) {
|
||||
builder.field("_routing", this.routing());
|
||||
if (this.perFieldAnalyzer != null) {
|
||||
builder.field(Field.PER_FIELD_ANALYZER.getPreferredName(), this.perFieldAnalyzer);
|
||||
}
|
||||
if (this.fetchSourceContext() != null) {
|
||||
FetchSourceContext source = this.fetchSourceContext();
|
||||
String[] includes = source.includes();
|
||||
String[] excludes = source.excludes();
|
||||
if (includes.length == 0 && excludes.length == 0) {
|
||||
builder.field("_source", source.fetchSource());
|
||||
} else if (includes.length > 0 && excludes.length == 0) {
|
||||
builder.array("_source", source.includes());
|
||||
} else if (excludes.length > 0) {
|
||||
builder.startObject("_source");
|
||||
if (includes.length > 0) {
|
||||
builder.array("includes", source.includes());
|
||||
}
|
||||
builder.array("excludes", source.excludes());
|
||||
builder.endObject();
|
||||
}
|
||||
if (this.routing != null) {
|
||||
builder.field(Field.ROUTING.getPreferredName(), this.routing);
|
||||
}
|
||||
if (this.version() != Versions.MATCH_ANY) {
|
||||
builder.field("_version", this.version());
|
||||
if (this.version != Versions.MATCH_ANY) {
|
||||
builder.field(Field.VERSION.getPreferredName(), this.version);
|
||||
}
|
||||
if (this.versionType() != VersionType.INTERNAL) {
|
||||
builder.field("_version_type", this.versionType().toString().toLowerCase(Locale.ROOT));
|
||||
if (this.versionType != VersionType.INTERNAL) {
|
||||
builder.field(Field.VERSION_TYPE.getPreferredName(), this.versionType.toString().toLowerCase(Locale.ROOT));
|
||||
}
|
||||
return builder.endObject();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String toString() {
|
||||
try {
|
||||
XContentBuilder builder = XContentFactory.jsonBuilder();
|
||||
builder.prettyPrint();
|
||||
toXContent(builder, EMPTY_PARAMS);
|
||||
return builder.string();
|
||||
} catch (Exception e) {
|
||||
return "{ \"error\" : \"" + ExceptionsHelper.detailedMessage(e) + "\"}";
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(index, type, id, doc, Arrays.hashCode(fields), perFieldAnalyzer, routing,
|
||||
version, versionType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (!(o instanceof Item)) return false;
|
||||
Item other = (Item) o;
|
||||
return Objects.equals(index, other.index) &&
|
||||
Objects.equals(type, other.type) &&
|
||||
Objects.equals(id, other.id) &&
|
||||
Objects.equals(doc, other.doc) &&
|
||||
Arrays.equals(fields, other.fields) && // otherwise we are comparing pointers
|
||||
Objects.equals(perFieldAnalyzer, other.perFieldAnalyzer) &&
|
||||
Objects.equals(routing, other.routing) &&
|
||||
Objects.equals(version, other.version) &&
|
||||
Objects.equals(versionType, other.versionType);
|
||||
}
|
||||
}
|
||||
|
||||
// document inputs
|
||||
private List<String> likeTexts = new ArrayList<>();
|
||||
private List<String> unlikeTexts = new ArrayList<>();
|
||||
private List<Item> likeItems = new ArrayList<>();
|
||||
private List<Item> unlikeItems = new ArrayList<>();
|
||||
private final String[] fields;
|
||||
private List<Item> docs = new ArrayList<>();
|
||||
private List<Item> unlikeDocs = new ArrayList<>();
|
||||
private Boolean include = null;
|
||||
private String minimumShouldMatch = null;
|
||||
private int minTermFreq = -1;
|
||||
|
||||
// term selection parameters
|
||||
private int maxQueryTerms = -1;
|
||||
private String[] stopWords = null;
|
||||
private int minTermFreq = -1;
|
||||
private int minDocFreq = -1;
|
||||
private int maxDocFreq = -1;
|
||||
private int minWordLength = -1;
|
||||
private int maxWordLength = -1;
|
||||
private float boostTerms = -1;
|
||||
private float boost = -1;
|
||||
private String[] stopWords = null;
|
||||
private String analyzer;
|
||||
|
||||
// query formation parameters
|
||||
private String minimumShouldMatch = null;
|
||||
private float boostTerms = -1;
|
||||
private Boolean include = null;
|
||||
|
||||
// other parameters
|
||||
private Boolean failOnUnsupportedField;
|
||||
private float boost = -1;
|
||||
private String queryName;
|
||||
|
||||
/**
|
||||
|
@ -162,108 +388,71 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
|||
this.fields = fields;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the documents to use in order to find documents that are "like" this.
|
||||
*
|
||||
* @param docs the documents to use when generating the 'More Like This' query.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder like(Item... docs) {
|
||||
this.docs = Arrays.asList(docs);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the text to use in order to find documents that are "like" this.
|
||||
*
|
||||
* @param likeText the text to use when generating the 'More Like This' query.
|
||||
* @param likeTexts the text to use when generating the 'More Like This' query.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder like(String... likeText) {
|
||||
this.docs = new ArrayList<>();
|
||||
for (String text : likeText) {
|
||||
this.docs.add(new Item(text));
|
||||
}
|
||||
return this;
|
||||
public MoreLikeThisQueryBuilder like(String... likeTexts) {
|
||||
this.likeTexts = new ArrayList<>();
|
||||
return addLikeText(likeTexts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the documents from which the terms should not be selected from.
|
||||
* Sets the documents to use in order to find documents that are "like" this.
|
||||
*
|
||||
* @param likeItems the documents to use when generating the 'More Like This' query.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder ignoreLike(Item... docs) {
|
||||
this.unlikeDocs = Arrays.asList(docs);
|
||||
return this;
|
||||
public MoreLikeThisQueryBuilder like(Item... likeItems) {
|
||||
this.likeItems = new ArrayList<>();
|
||||
return addLikeItem(likeItems);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the text from which the terms should not be selected from.
|
||||
* Adds some text to use in order to find documents that are "like" this.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder ignoreLike(String... likeText) {
|
||||
this.unlikeDocs = new ArrayList<>();
|
||||
for (String text : likeText) {
|
||||
this.unlikeDocs.add(new Item(text));
|
||||
}
|
||||
public MoreLikeThisQueryBuilder addLikeText(String... likeTexts) {
|
||||
Collections.addAll(this.likeTexts, likeTexts);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a document to use in order to find documents that are "like" this.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder addItem(Item item) {
|
||||
this.docs.add(item);
|
||||
public MoreLikeThisQueryBuilder addLikeItem(Item... likeItems) {
|
||||
Collections.addAll(this.likeItems, likeItems);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds some text to use in order to find documents that are "like" this.
|
||||
* Sets the text from which the terms should not be selected from.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder addLikeText(String likeText) {
|
||||
this.docs.add(new Item(likeText));
|
||||
public MoreLikeThisQueryBuilder unlike(String... unlikeTexts) {
|
||||
this.unlikeTexts = new ArrayList<>();
|
||||
return addUnlikeText(unlikeTexts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the documents from which the terms should not be selected from.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder unlike(Item... unlikeItems) {
|
||||
this.unlikeItems = new ArrayList<>();
|
||||
return addUnlikeItem(unlikeItems);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds some text to use in order to find documents that are "unlike" this.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder addUnlikeText(String... unlikeTexts) {
|
||||
Collections.addAll(this.unlikeTexts, unlikeTexts);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* The text to use in order to find documents that are "like" this.
|
||||
* Adds a document to use in order to find documents that are "unlike" this.
|
||||
*/
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder likeText(String likeText) {
|
||||
return like(likeText);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder ids(String... ids) {
|
||||
Item[] items = new Item[ids.length];
|
||||
for (int i = 0; i < items.length; i++) {
|
||||
items[i] = new Item(null, null, ids[i]);
|
||||
}
|
||||
return like(items);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder docs(Item... docs) {
|
||||
return like(docs);
|
||||
}
|
||||
|
||||
public MoreLikeThisQueryBuilder include(boolean include) {
|
||||
this.include = include;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of terms that must match the generated query expressed in the
|
||||
* common syntax for minimum should match. Defaults to <tt>30%</tt>.
|
||||
*
|
||||
* @see org.elasticsearch.common.lucene.search.Queries#calculateMinShouldMatch(int, String)
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder minimumShouldMatch(String minimumShouldMatch) {
|
||||
this.minimumShouldMatch = minimumShouldMatch;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* The frequency below which terms will be ignored in the source doc. The default
|
||||
* frequency is <tt>2</tt>.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder minTermFreq(int minTermFreq) {
|
||||
this.minTermFreq = minTermFreq;
|
||||
public MoreLikeThisQueryBuilder addUnlikeItem(Item... unlikeItems) {
|
||||
Collections.addAll(this.unlikeItems, unlikeItems);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -277,14 +466,11 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
|||
}
|
||||
|
||||
/**
|
||||
* Set the set of stopwords.
|
||||
* <p/>
|
||||
* <p>Any word in this set is considered "uninteresting" and ignored. Even if your Analyzer allows stopwords, you
|
||||
* might want to tell the MoreLikeThis code to ignore them, as for the purposes of document similarity it seems
|
||||
* reasonable to assume that "a stop word is never interesting".
|
||||
* The frequency below which terms will be ignored in the source doc. The default
|
||||
* frequency is <tt>2</tt>.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder stopWords(String... stopWords) {
|
||||
this.stopWords = stopWords;
|
||||
public MoreLikeThisQueryBuilder minTermFreq(int minTermFreq) {
|
||||
this.minTermFreq = minTermFreq;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -325,10 +511,14 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
|||
}
|
||||
|
||||
/**
|
||||
* Sets the boost factor to use when boosting terms. Defaults to <tt>1</tt>.
|
||||
* Set the set of stopwords.
|
||||
* <p/>
|
||||
* <p>Any word in this set is considered "uninteresting" and ignored. Even if your Analyzer allows stopwords, you
|
||||
* might want to tell the MoreLikeThis code to ignore them, as for the purposes of document similarity it seems
|
||||
* reasonable to assume that "a stop word is never interesting".
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder boostTerms(float boostTerms) {
|
||||
this.boostTerms = boostTerms;
|
||||
public MoreLikeThisQueryBuilder stopWords(String... stopWords) {
|
||||
this.stopWords = stopWords;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -340,9 +530,30 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
|||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MoreLikeThisQueryBuilder boost(float boost) {
|
||||
this.boost = boost;
|
||||
/**
|
||||
* Number of terms that must match the generated query expressed in the
|
||||
* common syntax for minimum should match. Defaults to <tt>30%</tt>.
|
||||
*
|
||||
* @see org.elasticsearch.common.lucene.search.Queries#calculateMinShouldMatch(int, String)
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder minimumShouldMatch(String minimumShouldMatch) {
|
||||
this.minimumShouldMatch = minimumShouldMatch;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the boost factor to use when boosting terms. Defaults to <tt>1</tt>.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder boostTerms(float boostTerms) {
|
||||
this.boostTerms = boostTerms;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to include the input documents. Defaults to <tt>false</tt>
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder include(boolean include) {
|
||||
this.include = include;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -354,6 +565,12 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
|||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MoreLikeThisQueryBuilder boost(float boost) {
|
||||
this.boost = boost;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the query name for the filter that can be used when searching for matched_filters per hit.
|
||||
*/
|
||||
|
@ -362,71 +579,123 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* The text to use in order to find documents that are "like" this.
|
||||
*/
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder likeText(String likeText) {
|
||||
return like(likeText);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder ids(String... ids) {
|
||||
Item[] items = new Item[ids.length];
|
||||
for (int i = 0; i < items.length; i++) {
|
||||
items[i] = new Item(null, null, ids[i]);
|
||||
}
|
||||
return like(items);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder docs(Item... docs) {
|
||||
return like(docs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the documents from which the terms should not be selected from.
|
||||
*
|
||||
* @Deprecated Use {@link #unlike(Item...)} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder ignoreLike(Item... docs) {
|
||||
return unlike(docs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the text from which the terms should not be selected from.
|
||||
*
|
||||
* @Deprecated Use {@link #unlike(String...)} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder ignoreLike(String... likeText) {
|
||||
return unlike(likeText);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a document to use in order to find documents that are "like" this.
|
||||
*/
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder addItem(Item... likeItems) {
|
||||
return addLikeItem(likeItems);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
String likeFieldName = MoreLikeThisQueryParser.Fields.LIKE.getPreferredName();
|
||||
builder.startObject(MoreLikeThisQueryParser.NAME);
|
||||
if (fields != null) {
|
||||
builder.startArray("fields");
|
||||
for (String field : fields) {
|
||||
builder.value(field);
|
||||
}
|
||||
builder.endArray();
|
||||
builder.field(MoreLikeThisQueryParser.Field.FIELDS.getPreferredName(), fields);
|
||||
}
|
||||
if (this.docs.isEmpty()) {
|
||||
throw new IllegalArgumentException("more_like_this requires '" + likeFieldName + "' to be provided");
|
||||
if (this.likeTexts.isEmpty() && this.likeItems.isEmpty()) {
|
||||
throw new IllegalArgumentException("more_like_this requires '" + MoreLikeThisQueryParser.Field.LIKE.getPreferredName() + "' to be provided");
|
||||
} else {
|
||||
builder.field(likeFieldName, docs);
|
||||
buildLikeField(builder, MoreLikeThisQueryParser.Field.LIKE.getPreferredName(), likeTexts, likeItems);
|
||||
}
|
||||
if (!unlikeDocs.isEmpty()) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.UNLIKE.getPreferredName(), unlikeDocs);
|
||||
}
|
||||
if (minimumShouldMatch != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);
|
||||
}
|
||||
if (minTermFreq != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.MIN_TERM_FREQ.getPreferredName(), minTermFreq);
|
||||
if (!unlikeTexts.isEmpty() || !unlikeItems.isEmpty()) {
|
||||
buildLikeField(builder, MoreLikeThisQueryParser.Field.UNLIKE.getPreferredName(), unlikeTexts, unlikeItems);
|
||||
}
|
||||
if (maxQueryTerms != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.MAX_QUERY_TERMS.getPreferredName(), maxQueryTerms);
|
||||
builder.field(MoreLikeThisQueryParser.Field.MAX_QUERY_TERMS.getPreferredName(), maxQueryTerms);
|
||||
}
|
||||
if (stopWords != null && stopWords.length > 0) {
|
||||
builder.startArray(MoreLikeThisQueryParser.Fields.STOP_WORDS.getPreferredName());
|
||||
for (String stopWord : stopWords) {
|
||||
builder.value(stopWord);
|
||||
}
|
||||
builder.endArray();
|
||||
if (minTermFreq != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.MIN_TERM_FREQ.getPreferredName(), minTermFreq);
|
||||
}
|
||||
if (minDocFreq != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.MIN_DOC_FREQ.getPreferredName(), minDocFreq);
|
||||
builder.field(MoreLikeThisQueryParser.Field.MIN_DOC_FREQ.getPreferredName(), minDocFreq);
|
||||
}
|
||||
if (maxDocFreq != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.MAX_DOC_FREQ.getPreferredName(), maxDocFreq);
|
||||
builder.field(MoreLikeThisQueryParser.Field.MAX_DOC_FREQ.getPreferredName(), maxDocFreq);
|
||||
}
|
||||
if (minWordLength != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.MIN_WORD_LENGTH.getPreferredName(), minWordLength);
|
||||
builder.field(MoreLikeThisQueryParser.Field.MIN_WORD_LENGTH.getPreferredName(), minWordLength);
|
||||
}
|
||||
if (maxWordLength != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.MAX_WORD_LENGTH.getPreferredName(), maxWordLength);
|
||||
builder.field(MoreLikeThisQueryParser.Field.MAX_WORD_LENGTH.getPreferredName(), maxWordLength);
|
||||
}
|
||||
if (stopWords != null && stopWords.length > 0) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.STOP_WORDS.getPreferredName(), stopWords);
|
||||
}
|
||||
if (analyzer != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.ANALYZER.getPreferredName(), analyzer);
|
||||
}
|
||||
if (minimumShouldMatch != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);
|
||||
}
|
||||
if (boostTerms != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.BOOST_TERMS.getPreferredName(), boostTerms);
|
||||
builder.field(MoreLikeThisQueryParser.Field.BOOST_TERMS.getPreferredName(), boostTerms);
|
||||
}
|
||||
if (include != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.INCLUDE.getPreferredName(), include);
|
||||
}
|
||||
if (failOnUnsupportedField != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.FAIL_ON_UNSUPPORTED_FIELD.getPreferredName(), failOnUnsupportedField);
|
||||
}
|
||||
if (boost != -1) {
|
||||
builder.field("boost", boost);
|
||||
}
|
||||
if (analyzer != null) {
|
||||
builder.field("analyzer", analyzer);
|
||||
}
|
||||
if (failOnUnsupportedField != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.FAIL_ON_UNSUPPORTED_FIELD.getPreferredName(), failOnUnsupportedField);
|
||||
}
|
||||
if (queryName != null) {
|
||||
builder.field("_name", queryName);
|
||||
}
|
||||
if (include != null) {
|
||||
builder.field("include", include);
|
||||
}
|
||||
builder.endObject();
|
||||
}
|
||||
|
||||
private static void buildLikeField(XContentBuilder builder, String fieldName, List<String> texts, List<Item> items) throws IOException {
|
||||
builder.startArray(fieldName);
|
||||
for (String text : texts) {
|
||||
builder.value(text);
|
||||
}
|
||||
for (Item item : items) {
|
||||
builder.value(item);
|
||||
}
|
||||
builder.endArray();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,9 +26,7 @@ import org.apache.lucene.search.BooleanClause;
|
|||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.action.termvectors.MultiTermVectorsRequest;
|
||||
import org.elasticsearch.action.termvectors.MultiTermVectorsResponse;
|
||||
import org.elasticsearch.action.termvectors.TermVectorsRequest;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
@ -38,44 +36,44 @@ import org.elasticsearch.common.xcontent.XContentParser;
|
|||
import org.elasticsearch.index.analysis.Analysis;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
|
||||
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.*;
|
||||
|
||||
import static org.elasticsearch.index.mapper.Uid.createUidAsBytes;
|
||||
|
||||
/**
|
||||
* Parser for the The More Like This Query (MLT Query) which finds documents that are "like" a given set of documents.
|
||||
*
|
||||
* The documents are provided as a set of strings and/or a list of {@link Item}.
|
||||
*/
|
||||
public class MoreLikeThisQueryParser implements QueryParser {
|
||||
|
||||
public static final String NAME = "mlt";
|
||||
private MoreLikeThisFetchService fetchService = null;
|
||||
|
||||
public static class Fields {
|
||||
public static final ParseField LIKE_TEXT = new ParseField("like_text").withAllDeprecated("like");
|
||||
public static final ParseField MIN_TERM_FREQ = new ParseField("min_term_freq");
|
||||
public static final ParseField MAX_QUERY_TERMS = new ParseField("max_query_terms");
|
||||
public static final ParseField MIN_WORD_LENGTH = new ParseField("min_word_length", "min_word_len");
|
||||
public static final ParseField MAX_WORD_LENGTH = new ParseField("max_word_length", "max_word_len");
|
||||
public static final ParseField MIN_DOC_FREQ = new ParseField("min_doc_freq");
|
||||
public static final ParseField MAX_DOC_FREQ = new ParseField("max_doc_freq");
|
||||
public static final ParseField BOOST_TERMS = new ParseField("boost_terms");
|
||||
public static final ParseField MINIMUM_SHOULD_MATCH = new ParseField("minimum_should_match");
|
||||
public static final ParseField FAIL_ON_UNSUPPORTED_FIELD = new ParseField("fail_on_unsupported_field");
|
||||
public static final ParseField STOP_WORDS = new ParseField("stop_words");
|
||||
public static final ParseField DOCUMENT_IDS = new ParseField("ids").withAllDeprecated("like");
|
||||
public static final ParseField DOCUMENTS = new ParseField("docs").withAllDeprecated("like");
|
||||
public static final ParseField LIKE = new ParseField("like");
|
||||
public static final ParseField UNLIKE = new ParseField("unlike");
|
||||
public static final ParseField INCLUDE = new ParseField("include");
|
||||
public interface Field {
|
||||
ParseField FIELDS = new ParseField("fields");
|
||||
ParseField LIKE = new ParseField("like");
|
||||
ParseField UNLIKE = new ParseField("unlike");
|
||||
ParseField LIKE_TEXT = new ParseField("like_text").withAllDeprecated("like");
|
||||
ParseField IDS = new ParseField("ids").withAllDeprecated("like");
|
||||
ParseField DOCS = new ParseField("docs").withAllDeprecated("like");
|
||||
ParseField MAX_QUERY_TERMS = new ParseField("max_query_terms");
|
||||
ParseField MIN_TERM_FREQ = new ParseField("min_term_freq");
|
||||
ParseField MIN_DOC_FREQ = new ParseField("min_doc_freq");
|
||||
ParseField MAX_DOC_FREQ = new ParseField("max_doc_freq");
|
||||
ParseField MIN_WORD_LENGTH = new ParseField("min_word_length", "min_word_len");
|
||||
ParseField MAX_WORD_LENGTH = new ParseField("max_word_length", "max_word_len");
|
||||
ParseField STOP_WORDS = new ParseField("stop_words");
|
||||
ParseField ANALYZER = new ParseField("analyzer");
|
||||
ParseField MINIMUM_SHOULD_MATCH = new ParseField("minimum_should_match");
|
||||
ParseField BOOST_TERMS = new ParseField("boost_terms");
|
||||
ParseField INCLUDE = new ParseField("include");
|
||||
ParseField FAIL_ON_UNSUPPORTED_FIELD = new ParseField("fail_on_unsupported_field");
|
||||
}
|
||||
|
||||
public MoreLikeThisQueryParser() {
|
||||
|
@ -98,109 +96,108 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
|
||||
MoreLikeThisQuery mltQuery = new MoreLikeThisQuery();
|
||||
mltQuery.setSimilarity(parseContext.searchSimilarity());
|
||||
Analyzer analyzer = null;
|
||||
|
||||
List<String> likeTexts = new ArrayList<>();
|
||||
List<String> unlikeTexts = new ArrayList<>();
|
||||
List<Item> likeItems = new ArrayList<>();
|
||||
List<Item> unlikeItems = new ArrayList<>();
|
||||
|
||||
List<String> moreLikeFields = null;
|
||||
Analyzer analyzer = null;
|
||||
boolean include = false;
|
||||
|
||||
boolean failOnUnsupportedField = true;
|
||||
String queryName = null;
|
||||
boolean include = false;
|
||||
|
||||
XContentParser.Token token;
|
||||
String currentFieldName = null;
|
||||
|
||||
List<String> likeTexts = new ArrayList<>();
|
||||
MultiTermVectorsRequest likeItems = new MultiTermVectorsRequest();
|
||||
|
||||
List<String> unlikeTexts = new ArrayList<>();
|
||||
MultiTermVectorsRequest unlikeItems = new MultiTermVectorsRequest();
|
||||
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if (token.isValue()) {
|
||||
if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.LIKE_TEXT)) {
|
||||
if (parseContext.parseFieldMatcher().match(currentFieldName, Field.LIKE)) {
|
||||
parseLikeField(parseContext, likeTexts, likeItems);
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.UNLIKE)) {
|
||||
parseLikeField(parseContext, unlikeTexts, unlikeItems);
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.LIKE_TEXT)) {
|
||||
likeTexts.add(parser.text());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.LIKE)) {
|
||||
parseLikeField(parser, likeTexts, likeItems);
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.UNLIKE)) {
|
||||
parseLikeField(parser, unlikeTexts, unlikeItems);
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MIN_TERM_FREQ)) {
|
||||
mltQuery.setMinTermFrequency(parser.intValue());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MAX_QUERY_TERMS)) {
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MAX_QUERY_TERMS)) {
|
||||
mltQuery.setMaxQueryTerms(parser.intValue());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MIN_DOC_FREQ)) {
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MIN_TERM_FREQ)) {
|
||||
mltQuery.setMinTermFrequency(parser.intValue());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MIN_DOC_FREQ)) {
|
||||
mltQuery.setMinDocFreq(parser.intValue());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MAX_DOC_FREQ)) {
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MAX_DOC_FREQ)) {
|
||||
mltQuery.setMaxDocFreq(parser.intValue());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MIN_WORD_LENGTH)) {
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MIN_WORD_LENGTH)) {
|
||||
mltQuery.setMinWordLen(parser.intValue());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MAX_WORD_LENGTH)) {
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MAX_WORD_LENGTH)) {
|
||||
mltQuery.setMaxWordLen(parser.intValue());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.BOOST_TERMS)) {
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.ANALYZER)) {
|
||||
analyzer = parseContext.analysisService().analyzer(parser.text());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MINIMUM_SHOULD_MATCH)) {
|
||||
mltQuery.setMinimumShouldMatch(parser.text());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.BOOST_TERMS)) {
|
||||
float boostFactor = parser.floatValue();
|
||||
if (boostFactor != 0) {
|
||||
mltQuery.setBoostTerms(true);
|
||||
mltQuery.setBoostTermsFactor(boostFactor);
|
||||
}
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MINIMUM_SHOULD_MATCH)) {
|
||||
mltQuery.setMinimumShouldMatch(parser.text());
|
||||
} else if ("analyzer".equals(currentFieldName)) {
|
||||
analyzer = parseContext.analysisService().analyzer(parser.text());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.INCLUDE)) {
|
||||
include = parser.booleanValue();
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.FAIL_ON_UNSUPPORTED_FIELD)) {
|
||||
failOnUnsupportedField = parser.booleanValue();
|
||||
} else if ("boost".equals(currentFieldName)) {
|
||||
mltQuery.setBoost(parser.floatValue());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.FAIL_ON_UNSUPPORTED_FIELD)) {
|
||||
failOnUnsupportedField = parser.booleanValue();
|
||||
} else if ("_name".equals(currentFieldName)) {
|
||||
queryName = parser.text();
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.INCLUDE)) {
|
||||
include = parser.booleanValue();
|
||||
} else {
|
||||
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
|
||||
}
|
||||
} else if (token == XContentParser.Token.START_ARRAY) {
|
||||
if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.STOP_WORDS)) {
|
||||
Set<String> stopWords = Sets.newHashSet();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
stopWords.add(parser.text());
|
||||
}
|
||||
mltQuery.setStopWords(stopWords);
|
||||
} else if ("fields".equals(currentFieldName)) {
|
||||
if (parseContext.parseFieldMatcher().match(currentFieldName, Field.FIELDS)) {
|
||||
moreLikeFields = new LinkedList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
String field = parser.text();
|
||||
MappedFieldType fieldType = parseContext.fieldMapper(field);
|
||||
moreLikeFields.add(fieldType == null ? field : fieldType.names().indexName());
|
||||
}
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.DOCUMENT_IDS)) {
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.LIKE)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
parseLikeField(parseContext, likeTexts, likeItems);
|
||||
}
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.UNLIKE)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
parseLikeField(parseContext, unlikeTexts, unlikeItems);
|
||||
}
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.IDS)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (!token.isValue()) {
|
||||
throw new IllegalArgumentException("ids array element should only contain ids");
|
||||
}
|
||||
likeItems.add(newTermVectorsRequest().id(parser.text()));
|
||||
likeItems.add(new Item(null, null, parser.text()));
|
||||
}
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.DOCUMENTS)) {
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.DOCS)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (token != XContentParser.Token.START_OBJECT) {
|
||||
throw new IllegalArgumentException("docs array element should include an object");
|
||||
}
|
||||
likeItems.add(parseDocument(parser));
|
||||
likeItems.add(Item.parse(parser, parseContext.parseFieldMatcher(), new Item()));
|
||||
}
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.LIKE)) {
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.STOP_WORDS)) {
|
||||
Set<String> stopWords = Sets.newHashSet();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
parseLikeField(parser, likeTexts, likeItems);
|
||||
}
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.UNLIKE)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
parseLikeField(parser, unlikeTexts, unlikeItems);
|
||||
stopWords.add(parser.text());
|
||||
}
|
||||
mltQuery.setStopWords(stopWords);
|
||||
} else {
|
||||
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
|
||||
}
|
||||
} else if (token == XContentParser.Token.START_OBJECT) {
|
||||
if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.LIKE)) {
|
||||
parseLikeField(parser, likeTexts, likeItems);
|
||||
}
|
||||
else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.UNLIKE)) {
|
||||
parseLikeField(parser, unlikeTexts, unlikeItems);
|
||||
if (parseContext.parseFieldMatcher().match(currentFieldName, Field.LIKE)) {
|
||||
parseLikeField(parseContext, likeTexts, likeItems);
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.UNLIKE)) {
|
||||
parseLikeField(parseContext, unlikeTexts, unlikeItems);
|
||||
} else {
|
||||
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
|
||||
}
|
||||
|
@ -225,6 +222,7 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
if (useDefaultField) {
|
||||
moreLikeFields = Collections.singletonList(parseContext.defaultField());
|
||||
}
|
||||
|
||||
// possibly remove unsupported fields
|
||||
removeUnsupportedFields(moreLikeFields, analyzer, failOnUnsupportedField);
|
||||
if (moreLikeFields.isEmpty()) {
|
||||
|
@ -242,93 +240,29 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
mltQuery.setLikeText(likeTexts);
|
||||
}
|
||||
if (!unlikeTexts.isEmpty()) {
|
||||
mltQuery.setIgnoreText(unlikeTexts);
|
||||
mltQuery.setUnlikeText(unlikeTexts);
|
||||
}
|
||||
|
||||
// handle items
|
||||
if (!likeItems.isEmpty()) {
|
||||
// set default index, type and fields if not specified
|
||||
MultiTermVectorsRequest items = likeItems;
|
||||
for (TermVectorsRequest item : unlikeItems) {
|
||||
items.add(item);
|
||||
}
|
||||
|
||||
for (TermVectorsRequest item : items) {
|
||||
if (item.index() == null) {
|
||||
item.index(parseContext.index().name());
|
||||
}
|
||||
if (item.type() == null) {
|
||||
if (parseContext.queryTypes().size() > 1) {
|
||||
throw new QueryParsingException(parseContext,
|
||||
"ambiguous type for item with id: " + item.id()
|
||||
+ " and index: " + item.index());
|
||||
} else {
|
||||
item.type(parseContext.queryTypes().iterator().next());
|
||||
}
|
||||
}
|
||||
// default fields if not present but don't override for artificial docs
|
||||
if (item.selectedFields() == null && item.doc() == null) {
|
||||
if (useDefaultField) {
|
||||
item.selectedFields("*");
|
||||
} else {
|
||||
item.selectedFields(moreLikeFields.toArray(new String[moreLikeFields.size()]));
|
||||
}
|
||||
}
|
||||
}
|
||||
// fetching the items with multi-termvectors API
|
||||
items.copyContextAndHeadersFrom(SearchContext.current());
|
||||
MultiTermVectorsResponse responses = fetchService.fetchResponse(items);
|
||||
|
||||
// getting the Fields for liked items
|
||||
mltQuery.setLikeText(MoreLikeThisFetchService.getFields(responses, likeItems));
|
||||
|
||||
// getting the Fields for ignored items
|
||||
if (!unlikeItems.isEmpty()) {
|
||||
org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, unlikeItems);
|
||||
if (ignoreFields.length > 0) {
|
||||
mltQuery.setUnlikeText(ignoreFields);
|
||||
}
|
||||
}
|
||||
|
||||
BooleanQuery.Builder boolQuery = new BooleanQuery.Builder();
|
||||
boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD);
|
||||
|
||||
// exclude the items from the search
|
||||
if (!include) {
|
||||
handleExclude(boolQuery, likeItems);
|
||||
}
|
||||
return boolQuery.build();
|
||||
return handleItems(parseContext, mltQuery, likeItems, unlikeItems, include, moreLikeFields, useDefaultField);
|
||||
} else {
|
||||
return mltQuery;
|
||||
}
|
||||
|
||||
return mltQuery;
|
||||
}
|
||||
|
||||
private TermVectorsRequest parseDocument(XContentParser parser) throws IOException {
|
||||
TermVectorsRequest termVectorsRequest = newTermVectorsRequest();
|
||||
TermVectorsRequest.parseRequest(termVectorsRequest, parser);
|
||||
return termVectorsRequest;
|
||||
}
|
||||
|
||||
private void parseLikeField(XContentParser parser, List<String> likeTexts, MultiTermVectorsRequest items) throws IOException {
|
||||
private static void parseLikeField(QueryParseContext parseContext, List<String> texts, List<Item> items) throws IOException {
|
||||
XContentParser parser = parseContext.parser();
|
||||
if (parser.currentToken().isValue()) {
|
||||
likeTexts.add(parser.text());
|
||||
texts.add(parser.text());
|
||||
} else if (parser.currentToken() == XContentParser.Token.START_OBJECT) {
|
||||
items.add(parseDocument(parser));
|
||||
items.add(Item.parse(parser, parseContext.parseFieldMatcher(), new Item()));
|
||||
} else {
|
||||
throw new IllegalArgumentException("Content of 'like' parameter should either be a string or an object");
|
||||
}
|
||||
}
|
||||
|
||||
private TermVectorsRequest newTermVectorsRequest() {
|
||||
return new TermVectorsRequest()
|
||||
.positions(false)
|
||||
.offsets(false)
|
||||
.payloads(false)
|
||||
.fieldStatistics(false)
|
||||
.termStatistics(false);
|
||||
}
|
||||
|
||||
private List<String> removeUnsupportedFields(List<String> moreLikeFields, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
|
||||
private static List<String> removeUnsupportedFields(List<String> moreLikeFields, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
|
||||
for (Iterator<String> it = moreLikeFields.iterator(); it.hasNext(); ) {
|
||||
final String fieldName = it.next();
|
||||
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
|
||||
|
@ -342,10 +276,67 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
return moreLikeFields;
|
||||
}
|
||||
|
||||
private void handleExclude(BooleanQuery.Builder boolQuery, MultiTermVectorsRequest likeItems) {
|
||||
private Query handleItems(QueryParseContext parseContext, MoreLikeThisQuery mltQuery, List<Item> likeItems, List<Item> unlikeItems,
|
||||
boolean include, List<String> moreLikeFields, boolean useDefaultField) throws IOException {
|
||||
// set default index, type and fields if not specified
|
||||
for (Item item : likeItems) {
|
||||
setDefaultIndexTypeFields(parseContext, item, moreLikeFields, useDefaultField);
|
||||
}
|
||||
for (Item item : unlikeItems) {
|
||||
setDefaultIndexTypeFields(parseContext, item, moreLikeFields, useDefaultField);
|
||||
}
|
||||
|
||||
// fetching the items with multi-termvectors API
|
||||
MultiTermVectorsResponse responses = fetchService.fetchResponse(likeItems, unlikeItems, SearchContext.current());
|
||||
|
||||
// getting the Fields for liked items
|
||||
mltQuery.setLikeText(MoreLikeThisFetchService.getFieldsFor(responses, likeItems));
|
||||
|
||||
// getting the Fields for unliked items
|
||||
if (!unlikeItems.isEmpty()) {
|
||||
org.apache.lucene.index.Fields[] unlikeFields = MoreLikeThisFetchService.getFieldsFor(responses, unlikeItems);
|
||||
if (unlikeFields.length > 0) {
|
||||
mltQuery.setUnlikeText(unlikeFields);
|
||||
}
|
||||
}
|
||||
|
||||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD);
|
||||
|
||||
// exclude the items from the search
|
||||
if (!include) {
|
||||
handleExclude(boolQuery, likeItems);
|
||||
}
|
||||
return boolQuery;
|
||||
}
|
||||
|
||||
private static void setDefaultIndexTypeFields(QueryParseContext parseContext, Item item, List<String> moreLikeFields,
|
||||
boolean useDefaultField) {
|
||||
if (item.index() == null) {
|
||||
item.index(parseContext.index().name());
|
||||
}
|
||||
if (item.type() == null) {
|
||||
if (parseContext.queryTypes().size() > 1) {
|
||||
throw new QueryParsingException(parseContext,
|
||||
"ambiguous type for item with id: " + item.id() + " and index: " + item.index());
|
||||
} else {
|
||||
item.type(parseContext.queryTypes().iterator().next());
|
||||
}
|
||||
}
|
||||
// default fields if not present but don't override for artificial docs
|
||||
if ((item.fields() == null || item.fields().length == 0) && item.doc() == null) {
|
||||
if (useDefaultField) {
|
||||
item.fields("*");
|
||||
} else {
|
||||
item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void handleExclude(BooleanQuery boolQuery, List<Item> likeItems) {
|
||||
// artificial docs get assigned a random id and should be disregarded
|
||||
List<BytesRef> uids = new ArrayList<>();
|
||||
for (TermVectorsRequest item : likeItems) {
|
||||
for (Item item : likeItems) {
|
||||
if (item.doc() != null) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -20,12 +20,17 @@
|
|||
package org.elasticsearch.index.search.morelikethis;
|
||||
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.elasticsearch.action.termvectors.*;
|
||||
import org.elasticsearch.action.termvectors.MultiTermVectorsItemResponse;
|
||||
import org.elasticsearch.action.termvectors.MultiTermVectorsRequest;
|
||||
import org.elasticsearch.action.termvectors.MultiTermVectorsResponse;
|
||||
import org.elasticsearch.action.termvectors.TermVectorsResponse;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -46,24 +51,35 @@ public class MoreLikeThisFetchService extends AbstractComponent {
|
|||
this.client = client;
|
||||
}
|
||||
|
||||
public Fields[] fetch(MultiTermVectorsRequest requests) throws IOException {
|
||||
return getFields(fetchResponse(requests), requests);
|
||||
public Fields[] fetch(List<Item> items) throws IOException {
|
||||
return getFieldsFor(fetchResponse(items, null, SearchContext.current()), items);
|
||||
}
|
||||
|
||||
public MultiTermVectorsResponse fetchResponse(MultiTermVectorsRequest requests) throws IOException {
|
||||
return client.multiTermVectors(requests).actionGet();
|
||||
public MultiTermVectorsResponse fetchResponse(List<Item> likeItems, @Nullable List<Item> unlikeItems,
|
||||
SearchContext searchContext) throws IOException {
|
||||
MultiTermVectorsRequest request = new MultiTermVectorsRequest();
|
||||
for (Item item : likeItems) {
|
||||
request.add(item.toTermVectorsRequest());
|
||||
}
|
||||
if (unlikeItems != null) {
|
||||
for (Item item : unlikeItems) {
|
||||
request.add(item.toTermVectorsRequest());
|
||||
}
|
||||
}
|
||||
request.copyContextAndHeadersFrom(searchContext);
|
||||
return client.multiTermVectors(request).actionGet();
|
||||
}
|
||||
|
||||
public static Fields[] getFields(MultiTermVectorsResponse responses, MultiTermVectorsRequest requests) throws IOException {
|
||||
public static Fields[] getFieldsFor(MultiTermVectorsResponse responses, List<Item> items) throws IOException {
|
||||
List<Fields> likeFields = new ArrayList<>();
|
||||
|
||||
Set<Item> items = new HashSet<>();
|
||||
for (TermVectorsRequest request : requests) {
|
||||
items.add(new Item(request.index(), request.type(), request.id()));
|
||||
Set<Item> selectedItems = new HashSet<>();
|
||||
for (Item request : items) {
|
||||
selectedItems.add(new Item(request.index(), request.type(), request.id()));
|
||||
}
|
||||
|
||||
for (MultiTermVectorsItemResponse response : responses) {
|
||||
if (!hasResponseFromRequest(response, items)) {
|
||||
if (!hasResponseFromRequest(response, selectedItems)) {
|
||||
continue;
|
||||
}
|
||||
if (response.isFailed()) {
|
||||
|
@ -78,7 +94,7 @@ public class MoreLikeThisFetchService extends AbstractComponent {
|
|||
return likeFields.toArray(Fields.EMPTY_ARRAY);
|
||||
}
|
||||
|
||||
private static boolean hasResponseFromRequest(MultiTermVectorsItemResponse response, Set<Item> items) {
|
||||
return items.contains(new Item(response.getIndex(), response.getType(), response.getId()));
|
||||
private static boolean hasResponseFromRequest(MultiTermVectorsItemResponse response, Set<Item> selectedItems) {
|
||||
return selectedItems.contains(new Item(response.getIndex(), response.getType(), response.getId()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.common.util.ExtensionPoint;
|
||||
import org.elasticsearch.index.query.*;
|
||||
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryParser;
|
||||
import org.elasticsearch.index.query.MoreLikeThisQueryParser;
|
||||
import org.elasticsearch.indices.analysis.HunspellService;
|
||||
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
|
||||
import org.elasticsearch.indices.cache.query.IndicesQueryCache;
|
||||
|
|
|
@ -21,41 +21,15 @@ package org.elasticsearch.index.query;
|
|||
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.memory.MemoryIndex;
|
||||
import org.apache.lucene.queries.BoostingQuery;
|
||||
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryWrapperFilter;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
|
||||
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanContainingQuery;
|
||||
import org.apache.lucene.search.spans.SpanFirstQuery;
|
||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.SpanWithinQuery;
|
||||
import org.apache.lucene.search.spans.*;
|
||||
import org.apache.lucene.spatial.prefix.IntersectsPrefixTreeFilter;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
@ -63,12 +37,10 @@ import org.apache.lucene.util.CharsRefBuilder;
|
|||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
|
||||
import org.elasticsearch.action.termvectors.MultiTermVectorsItemResponse;
|
||||
import org.elasticsearch.action.termvectors.MultiTermVectorsRequest;
|
||||
import org.elasticsearch.action.termvectors.MultiTermVectorsResponse;
|
||||
import org.elasticsearch.action.termvectors.TermVectorsRequest;
|
||||
import org.elasticsearch.action.termvectors.TermVectorsResponse;
|
||||
import org.elasticsearch.cluster.metadata.MetaData;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.compress.CompressedXContent;
|
||||
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
|
||||
|
@ -87,6 +59,7 @@ import org.elasticsearch.index.engine.Engine;
|
|||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
|
||||
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
|
||||
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
|
||||
import org.elasticsearch.index.search.geo.GeoDistanceRangeQuery;
|
||||
import org.elasticsearch.index.search.geo.GeoPolygonQuery;
|
||||
|
@ -99,10 +72,7 @@ import org.junit.Before;
|
|||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.*;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.*;
|
||||
|
@ -1772,15 +1742,15 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public MultiTermVectorsResponse fetchResponse(MultiTermVectorsRequest items) throws IOException {
|
||||
public MultiTermVectorsResponse fetchResponse(List<Item> items, List<Item> unlikeItems, SearchContext searchContext) throws IOException {
|
||||
MultiTermVectorsItemResponse[] responses = new MultiTermVectorsItemResponse[items.size()];
|
||||
int i = 0;
|
||||
for (TermVectorsRequest item : items) {
|
||||
for (Item item : items) {
|
||||
TermVectorsResponse response = new TermVectorsResponse(item.index(), item.type(), item.id());
|
||||
response.setExists(true);
|
||||
Fields generatedFields = generateFields(item.selectedFields().toArray(Strings.EMPTY_ARRAY), item.id());
|
||||
Fields generatedFields = generateFields(item.fields(), item.id());
|
||||
EnumSet<TermVectorsRequest.Flag> flags = EnumSet.of(TermVectorsRequest.Flag.Positions, TermVectorsRequest.Flag.Offsets);
|
||||
response.setFields(generatedFields, item.selectedFields(), flags, generatedFields);
|
||||
response.setFields(generatedFields, new HashSet<String>(Arrays.asList(item.fields())), flags, generatedFields);
|
||||
responses[i++] = new MultiTermVectorsItemResponse(response, null);
|
||||
}
|
||||
return new MultiTermVectorsResponse(responses);
|
||||
|
|
|
@ -20,71 +20,28 @@
|
|||
package org.elasticsearch.search.morelikethis;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
import org.elasticsearch.action.get.MultiGetRequest;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.VersionType;
|
||||
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder;
|
||||
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
|
||||
import org.elasticsearch.search.fetch.source.FetchSourceContext;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
|
||||
public class ItemSerializationTests extends ESTestCase {
|
||||
|
||||
private Item generateRandomItem(int arraySize, int stringSize) {
|
||||
String index = randomAsciiOfLength(stringSize);
|
||||
String type = randomAsciiOfLength(stringSize);
|
||||
String id = String.valueOf(Math.abs(randomInt()));
|
||||
String routing = randomBoolean() ? randomAsciiOfLength(stringSize) : null;
|
||||
String[] fields = generateRandomStringArray(arraySize, stringSize, true);
|
||||
|
||||
String routing = randomBoolean() ? randomAsciiOfLength(stringSize) : null;
|
||||
long version = Math.abs(randomLong());
|
||||
VersionType versionType = RandomPicks.randomFrom(new Random(), VersionType.values());
|
||||
|
||||
FetchSourceContext fetchSourceContext;
|
||||
switch (randomIntBetween(0, 3)) {
|
||||
case 0 :
|
||||
fetchSourceContext = new FetchSourceContext(randomBoolean());
|
||||
break;
|
||||
case 1 :
|
||||
fetchSourceContext = new FetchSourceContext(generateRandomStringArray(arraySize, stringSize, true));
|
||||
break;
|
||||
case 2 :
|
||||
fetchSourceContext = new FetchSourceContext(generateRandomStringArray(arraySize, stringSize, true),
|
||||
generateRandomStringArray(arraySize, stringSize, true));
|
||||
break;
|
||||
default:
|
||||
fetchSourceContext = null;
|
||||
break;
|
||||
}
|
||||
return (Item) new Item(index, type, id).routing(routing).fields(fields).version(version).versionType(versionType)
|
||||
.fetchSourceContext(fetchSourceContext);
|
||||
}
|
||||
|
||||
private String ItemToJSON(Item item) throws IOException {
|
||||
XContentBuilder builder = XContentFactory.jsonBuilder();
|
||||
builder.startObject();
|
||||
builder.startArray("docs");
|
||||
item.toXContent(builder, ToXContent.EMPTY_PARAMS);
|
||||
builder.endArray();
|
||||
builder.endObject();
|
||||
return XContentHelper.convertToJson(builder.bytes(), false);
|
||||
}
|
||||
|
||||
private MultiGetRequest.Item JSONtoItem(String json) throws Exception {
|
||||
MultiGetRequest request = new MultiGetRequest().add(null, null, null, null, new BytesArray(json), true);
|
||||
return request.getItems().get(0);
|
||||
return new Item(index, type, id).fields(fields).routing(routing).version(version).versionType(versionType);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -94,66 +51,10 @@ public class ItemSerializationTests extends ESTestCase {
|
|||
int maxStringSize = 8;
|
||||
for (int i = 0; i < numOfTrials; i++) {
|
||||
Item item1 = generateRandomItem(maxArraySize, maxStringSize);
|
||||
String json = ItemToJSON(item1);
|
||||
MultiGetRequest.Item item2 = JSONtoItem(json);
|
||||
String json = item1.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS).string();
|
||||
XContentParser parser = XContentFactory.xContent(json).createParser(json);
|
||||
Item item2 = Item.parse(parser, ParseFieldMatcher.STRICT, new Item());
|
||||
assertEquals(item1, item2);
|
||||
}
|
||||
}
|
||||
|
||||
private List<MultiGetRequest.Item> testItemsFromJSON(String json) throws Exception {
|
||||
MultiGetRequest request = new MultiGetRequest();
|
||||
request.add(null, null, null, null, new BytesArray(json), true);
|
||||
List<MultiGetRequest.Item> items = request.getItems();
|
||||
|
||||
assertEquals(items.size(), 3);
|
||||
for (MultiGetRequest.Item item : items) {
|
||||
assertThat(item.index(), is("test"));
|
||||
assertThat(item.type(), is("type"));
|
||||
FetchSourceContext fetchSource = item.fetchSourceContext();
|
||||
switch (item.id()) {
|
||||
case "1" :
|
||||
assertThat(fetchSource.fetchSource(), is(false));
|
||||
break;
|
||||
case "2" :
|
||||
assertThat(fetchSource.fetchSource(), is(true));
|
||||
assertThat(fetchSource.includes(), is(new String[]{"field3", "field4"}));
|
||||
break;
|
||||
case "3" :
|
||||
assertThat(fetchSource.fetchSource(), is(true));
|
||||
assertThat(fetchSource.includes(), is(new String[]{"user"}));
|
||||
assertThat(fetchSource.excludes(), is(new String[]{"user.location"}));
|
||||
break;
|
||||
default:
|
||||
fail("item with id: " + item.id() + " is not 1, 2 or 3");
|
||||
break;
|
||||
}
|
||||
}
|
||||
return items;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleItemSerializationFromFile() throws Exception {
|
||||
// test items from JSON
|
||||
List<MultiGetRequest.Item> itemsFromJSON = testItemsFromJSON(
|
||||
copyToStringFromClasspath("/org/elasticsearch/search/morelikethis/items.json"));
|
||||
|
||||
// create builder from items
|
||||
XContentBuilder builder = XContentFactory.jsonBuilder();
|
||||
builder.startObject();
|
||||
builder.startArray("docs");
|
||||
for (MultiGetRequest.Item item : itemsFromJSON) {
|
||||
MoreLikeThisQueryBuilder.Item itemForBuilder = (MoreLikeThisQueryBuilder.Item) new MoreLikeThisQueryBuilder.Item(
|
||||
item.index(), item.type(), item.id())
|
||||
.fetchSourceContext(item.fetchSourceContext())
|
||||
.fields(item.fields());
|
||||
itemForBuilder.toXContent(builder, ToXContent.EMPTY_PARAMS);
|
||||
}
|
||||
builder.endArray();
|
||||
builder.endObject();
|
||||
|
||||
// verify generated JSON lead to the same items
|
||||
String json = XContentHelper.convertToJson(builder.bytes(), false);
|
||||
testItemsFromJSON(json);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -43,8 +43,8 @@ import static org.elasticsearch.client.Requests.*;
|
|||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.moreLikeThisQuery;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.notNullValue;
|
||||
|
@ -57,11 +57,11 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
@Test
|
||||
public void testSimpleMoreLikeThis() throws Exception {
|
||||
logger.info("Creating index test");
|
||||
assertAcked(prepareCreate("test").addMapping("type1",
|
||||
assertAcked(prepareCreate("test").addMapping("type1",
|
||||
jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||
.startObject("text").field("type", "string").endObject()
|
||||
.endObject().endObject().endObject()));
|
||||
|
||||
.startObject("text").field("type", "string").endObject()
|
||||
.endObject().endObject().endObject()));
|
||||
|
||||
logger.info("Running Cluster Health");
|
||||
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
|
||||
|
||||
|
@ -72,11 +72,10 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
|
||||
logger.info("Running moreLikeThis");
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(response, 1l);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test
|
||||
public void testSimpleMoreLikeOnLongField() throws Exception {
|
||||
logger.info("Creating index test");
|
||||
|
@ -89,23 +88,21 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
client().index(indexRequest("test").type("type2").id("2").source(jsonBuilder().startObject().field("some_long", 0).endObject())).actionGet();
|
||||
client().index(indexRequest("test").type("type1").id("3").source(jsonBuilder().startObject().field("some_long", -666).endObject())).actionGet();
|
||||
|
||||
|
||||
client().admin().indices().refresh(refreshRequest()).actionGet();
|
||||
|
||||
logger.info("Running moreLikeThis");
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(response, 0l);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testMoreLikeThisWithAliases() throws Exception {
|
||||
logger.info("Creating index test");
|
||||
assertAcked(prepareCreate("test").addMapping("type1",
|
||||
assertAcked(prepareCreate("test").addMapping("type1",
|
||||
jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||
.startObject("text").field("type", "string").endObject()
|
||||
.endObject().endObject().endObject()));
|
||||
.startObject("text").field("type", "string").endObject()
|
||||
.endObject().endObject().endObject()));
|
||||
logger.info("Creating aliases alias release");
|
||||
client().admin().indices().aliases(indexAliasesRequest().addAlias("release", termQuery("text", "release"), "test")).actionGet();
|
||||
client().admin().indices().aliases(indexAliasesRequest().addAlias("beta", termQuery("text", "beta"), "test")).actionGet();
|
||||
|
@ -122,27 +119,26 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
|
||||
logger.info("Running moreLikeThis on index");
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(response, 2l);
|
||||
|
||||
logger.info("Running moreLikeThis on beta shard");
|
||||
response = client().prepareSearch("beta").setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(response, 1l);
|
||||
assertThat(response.getHits().getAt(0).id(), equalTo("3"));
|
||||
|
||||
logger.info("Running moreLikeThis on release shard");
|
||||
response = client().prepareSearch("release").setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(response, 1l);
|
||||
assertThat(response.getHits().getAt(0).id(), equalTo("2"));
|
||||
|
||||
logger.info("Running moreLikeThis on alias with node client");
|
||||
response = internalCluster().clientNodeClient().prepareSearch("beta").setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(response, 1l);
|
||||
assertThat(response.getHits().getAt(0).id(), equalTo("3"));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -160,11 +156,11 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
|
||||
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem(new Item("foo", "bar", "1"))).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1"))).get();
|
||||
assertNoFailures(response);
|
||||
assertThat(response, notNullValue());
|
||||
response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem(new Item("foo", "bar", "1"))).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1"))).get();
|
||||
assertNoFailures(response);
|
||||
assertThat(response, notNullValue());
|
||||
}
|
||||
|
@ -186,7 +182,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
client().admin().indices().prepareRefresh("foo").execute().actionGet();
|
||||
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem((Item) new Item("foo", "bar", "1").routing("2"))).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1").routing("2"))).get();
|
||||
assertNoFailures(response);
|
||||
assertThat(response, notNullValue());
|
||||
}
|
||||
|
@ -209,7 +205,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
.execute().actionGet();
|
||||
client().admin().indices().prepareRefresh("foo").execute().actionGet();
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem((Item) new Item("foo", "bar", "1").routing("4000"))).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1").routing("4000"))).get();
|
||||
assertNoFailures(response);
|
||||
assertThat(response, notNullValue());
|
||||
}
|
||||
|
@ -237,12 +233,12 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
|
||||
// Implicit list of fields -> ignore numeric fields
|
||||
SearchResponse searchResponse = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
|
||||
// Explicit list of fields including numeric fields -> fail
|
||||
assertThrows(client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder("string_value", "int_value").addItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class);
|
||||
new MoreLikeThisQueryBuilder("string_value", "int_value").addLikeItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class);
|
||||
|
||||
// mlt query with no field -> OK
|
||||
searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery().likeText("index").minTermFreq(1).minDocFreq(1)).execute().actionGet();
|
||||
|
@ -299,16 +295,16 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
|
||||
logger.info("Running More Like This with include true");
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
|
||||
assertOrderedSearchHits(response, "1", "2");
|
||||
|
||||
response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "2")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "2")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
|
||||
assertOrderedSearchHits(response, "2", "1");
|
||||
|
||||
logger.info("Running More Like This with include false");
|
||||
response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).minimumShouldMatch("0%")).get();
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).minimumShouldMatch("0%")).get();
|
||||
assertSearchHits(response, "2");
|
||||
}
|
||||
|
||||
|
@ -359,7 +355,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
|
||||
logger.info("Running MoreLikeThis");
|
||||
MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").include(true).minTermFreq(1).minDocFreq(1)
|
||||
.addItem(new MoreLikeThisQueryBuilder.Item("test", "type0", "0"));
|
||||
.addLikeItem(new Item("test", "type0", "0"));
|
||||
|
||||
String[] types = new String[numOfTypes];
|
||||
for (int i = 0; i < numOfTypes; i++) {
|
||||
|
@ -389,8 +385,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
indexRandom(true, builders);
|
||||
|
||||
int maxIters = randomIntBetween(10, 20);
|
||||
for (int i = 0; i < maxIters; i++)
|
||||
{
|
||||
for (int i = 0; i < maxIters; i++) {
|
||||
int max_query_terms = randomIntBetween(1, values.length);
|
||||
logger.info("Running More Like This with max_query_terms = %s", max_query_terms);
|
||||
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery("text").ids("0").minTermFreq(1).minDocFreq(1)
|
||||
|
@ -451,14 +446,14 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
logger.info("Indexing a single document ...");
|
||||
XContentBuilder doc = jsonBuilder().startObject();
|
||||
for (int i = 0; i < numFields; i++) {
|
||||
doc.field("field"+i, generateRandomStringArray(5, 10, false)+"a"); // make sure they are not all empty
|
||||
doc.field("field" + i, generateRandomStringArray(5, 10, false) + "a"); // make sure they are not all empty
|
||||
}
|
||||
doc.endObject();
|
||||
indexRandom(true, client().prepareIndex("test", "type1", "0").setSource(doc));
|
||||
|
||||
logger.info("Checking the document matches ...");
|
||||
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery()
|
||||
.like((Item) new Item().doc(doc).index("test").type("type1").routing("0")) // routing to ensure we hit the shard with the doc
|
||||
.like(new Item("test", "type1", doc).routing("0")) // routing to ensure we hit the shard with the doc
|
||||
.minTermFreq(0)
|
||||
.minDocFreq(0)
|
||||
.maxQueryTerms(100)
|
||||
|
@ -479,18 +474,18 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
logger.info("Creating an index with a single document ...");
|
||||
indexRandom(true, client().prepareIndex("test", "type1", "1").setSource(jsonBuilder()
|
||||
.startObject()
|
||||
.field("text", "Hello World!")
|
||||
.field("date", "2009-01-01")
|
||||
.field("text", "Hello World!")
|
||||
.field("date", "2009-01-01")
|
||||
.endObject()));
|
||||
|
||||
logger.info("Checking with a malformed field value ...");
|
||||
XContentBuilder malformedFieldDoc = jsonBuilder()
|
||||
.startObject()
|
||||
.field("text", "Hello World!")
|
||||
.field("date", "this is not a date!")
|
||||
.field("text", "Hello World!")
|
||||
.field("date", "this is not a date!")
|
||||
.endObject();
|
||||
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery()
|
||||
.like((Item) new Item().doc(malformedFieldDoc).index("test").type("type1"))
|
||||
.like(new Item("test", "type1", malformedFieldDoc))
|
||||
.minTermFreq(0)
|
||||
.minDocFreq(0)
|
||||
.minimumShouldMatch("0%");
|
||||
|
@ -502,7 +497,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
logger.info("Checking with an empty document ...");
|
||||
XContentBuilder emptyDoc = jsonBuilder().startObject().endObject();
|
||||
mltQuery = moreLikeThisQuery()
|
||||
.like((Item) new Item().doc(emptyDoc).index("test").type("type1"))
|
||||
.like(new Item("test", "type1", emptyDoc))
|
||||
.minTermFreq(0)
|
||||
.minDocFreq(0)
|
||||
.minimumShouldMatch("0%");
|
||||
|
@ -514,7 +509,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
logger.info("Checking when document is malformed ...");
|
||||
XContentBuilder malformedDoc = jsonBuilder().startObject();
|
||||
mltQuery = moreLikeThisQuery()
|
||||
.like((Item) new Item().doc(malformedDoc).index("test").type("type1"))
|
||||
.like(new Item("test", "type1", malformedDoc))
|
||||
.minTermFreq(0)
|
||||
.minDocFreq(0)
|
||||
.minimumShouldMatch("0%");
|
||||
|
@ -526,11 +521,11 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
logger.info("Checking the document matches otherwise ...");
|
||||
XContentBuilder normalDoc = jsonBuilder()
|
||||
.startObject()
|
||||
.field("text", "Hello World!")
|
||||
.field("date", "1000-01-01") // should be properly parsed but ignored ...
|
||||
.field("text", "Hello World!")
|
||||
.field("date", "1000-01-01") // should be properly parsed but ignored ...
|
||||
.endObject();
|
||||
mltQuery = moreLikeThisQuery()
|
||||
.like((Item) new Item().doc(normalDoc).index("test").type("type1"))
|
||||
.like(new Item("test", "type1", normalDoc))
|
||||
.minTermFreq(0)
|
||||
.minDocFreq(0)
|
||||
.minimumShouldMatch("100%"); // strict all terms must match but date is ignored
|
||||
|
@ -541,7 +536,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testMoreLikeThisIgnoreLike() throws ExecutionException, InterruptedException, IOException {
|
||||
public void testMoreLikeThisUnlike() throws ExecutionException, InterruptedException, IOException {
|
||||
createIndex("test");
|
||||
ensureGreen();
|
||||
int numFields = randomIntBetween(5, 10);
|
||||
|
@ -561,8 +556,8 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
indexRandom(true, builders);
|
||||
|
||||
logger.info("First check the document matches all indexed docs.");
|
||||
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery("field0")
|
||||
.like((Item) new Item().doc(doc).index("test").type("type1"))
|
||||
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery()
|
||||
.like(new Item("test", "type1", doc))
|
||||
.minTermFreq(0)
|
||||
.minDocFreq(0)
|
||||
.maxQueryTerms(100)
|
||||
|
@ -577,11 +572,12 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
for (int i = 0; i < numFields; i++) {
|
||||
docs.add(new Item("test", "type1", i+""));
|
||||
mltQuery = moreLikeThisQuery()
|
||||
.like((Item) new Item().doc(doc).index("test").type("type1"))
|
||||
.like(new Item("test", "type1", doc))
|
||||
.ignoreLike(docs.toArray(Item.EMPTY_ARRAY))
|
||||
.minTermFreq(0)
|
||||
.minDocFreq(0)
|
||||
.maxQueryTerms(100)
|
||||
.include(true)
|
||||
.minimumShouldMatch("0%");
|
||||
response = client().prepareSearch("test").setTypes("type1").setQuery(mltQuery).get();
|
||||
assertSearchResponse(response);
|
||||
|
@ -628,5 +624,4 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
assertSearchResponse(response);
|
||||
assertHitCount(response, 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -52,6 +52,7 @@ import org.elasticsearch.http.HttpServerTransport;
|
|||
import org.elasticsearch.index.query.BoolQueryBuilder;
|
||||
import org.elasticsearch.index.query.GeoShapeQueryBuilder;
|
||||
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder;
|
||||
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.index.query.TermsLookupQueryBuilder;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
|
@ -229,7 +230,7 @@ public class ContextAndHeaderTransportIT extends ESIntegTestCase {
|
|||
transportClient().admin().indices().prepareRefresh(lookupIndex, queryIndex).get();
|
||||
|
||||
MoreLikeThisQueryBuilder moreLikeThisQueryBuilder = QueryBuilders.moreLikeThisQuery("name")
|
||||
.addItem(new MoreLikeThisQueryBuilder.Item(lookupIndex, "type", "1"))
|
||||
.addLikeItem(new Item(lookupIndex, "type", "1"))
|
||||
.minTermFreq(1)
|
||||
.minDocFreq(1);
|
||||
|
||||
|
|
|
@ -41,4 +41,12 @@ thought of as a delete operation followed by an index operation.
|
|||
==== `indices.fielddata.cache.expire`
|
||||
|
||||
The experimental feature `indices.fielddata.cache.expire` has been removed.
|
||||
For indices that have this setting configured, this config will be ignored.
|
||||
For indices that have this setting configured, this config will be ignored.
|
||||
|
||||
=== More Like This
|
||||
|
||||
The MoreLikeThisQueryBuilder#ignoreLike methods have been deprecating in favor
|
||||
to using the unlike methods.
|
||||
|
||||
MoreLikeThisBuilder#addItem has been deprecated in favor to using
|
||||
MoreLikeThisBuilder#addLikeItem.
|
Loading…
Reference in New Issue