diff --git a/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java b/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java index 46fe1992f28..cbc1b47733c 100644 --- a/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java +++ b/core/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java @@ -640,7 +640,7 @@ public class TermVectorsRequest extends SingleShardRequest i } } - private static Map readPerFieldAnalyzer(Map map) { + public static Map readPerFieldAnalyzer(Map map) { Map mapStrStr = new HashMap<>(); for (Map.Entry e : map.entrySet()) { if (e.getValue() instanceof String) { diff --git a/core/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java b/core/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java index 2084b675aea..cdfa9ad9991 100644 --- a/core/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java +++ b/core/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java @@ -253,12 +253,12 @@ public class MoreLikeThisQuery extends Query { setLikeText(likeText.toArray(Strings.EMPTY_ARRAY)); } - public void setUnlikeText(Fields... ignoreFields) { - this.unlikeFields = ignoreFields; + public void setUnlikeText(Fields... unlikeFields) { + this.unlikeFields = unlikeFields; } - public void setIgnoreText(List ignoreText) { - this.unlikeText = ignoreText.toArray(Strings.EMPTY_ARRAY); + public void setUnlikeText(List unlikeText) { + this.unlikeText = unlikeText.toArray(Strings.EMPTY_ARRAY); } public String[] getMoreLikeFields() { diff --git a/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java index 19d65d91d27..4994070fd74 100644 --- a/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryBuilder.java @@ -19,131 +19,357 @@ package org.elasticsearch.index.query; -import org.elasticsearch.action.get.MultiGetRequest; +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.termvectors.TermVectorsRequest; import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.lucene.uid.Versions; import org.elasticsearch.common.xcontent.*; import org.elasticsearch.index.VersionType; -import org.elasticsearch.search.fetch.source.FetchSourceContext; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Locale; +import java.util.*; + +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; /** - * A more like this query that finds documents that are "like" the provided {@link #likeText(String)} - * which is checked against the fields the query is constructed with. + * A more like this query that finds documents that are "like" the provided set of document(s). + * + * The documents are provided as a set of strings and/or a list of {@link Item}. */ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQueryBuilder { /** - * A single get item. Pure delegate to multi get. + * A single item to be used for a {@link MoreLikeThisQueryBuilder}. */ - public static final class Item extends MultiGetRequest.Item implements ToXContent { + public static final class Item implements ToXContent { public static final Item[] EMPTY_ARRAY = new Item[0]; + public interface Field { + ParseField INDEX = new ParseField("_index"); + ParseField TYPE = new ParseField("_type"); + ParseField ID = new ParseField("_id"); + ParseField DOC = new ParseField("doc"); + ParseField FIELDS = new ParseField("fields"); + ParseField PER_FIELD_ANALYZER = new ParseField("per_field_analyzer"); + ParseField ROUTING = new ParseField("_routing"); + ParseField VERSION = new ParseField("_version"); + ParseField VERSION_TYPE = new ParseField("_version_type"); + } + + private String index; + private String type; + private String id; private BytesReference doc; - private String likeText; + private String[] fields; + private Map perFieldAnalyzer; + private String routing; + private long version = Versions.MATCH_ANY; + private VersionType versionType = VersionType.INTERNAL; public Item() { - super(); + } + /** + * Constructor for a given item / document request + * + * @param index the index where the document is located + * @param type the type of the document + * @param id and its id + */ public Item(String index, @Nullable String type, String id) { - super(index, type, id); + this.index = index; + this.type = type; + this.id = id; } - public Item(String likeText) { - this.likeText = likeText; + /** + * Constructor for an artificial document request, that is not present in the index. + * + * @param index the index to be used for parsing the doc + * @param type the type to be used for parsing the doc + * @param doc the document specification + */ + public Item(String index, String type, XContentBuilder doc) { + this.index = index; + this.type = type; + this.doc(doc); + } + + public String index() { + return index; + } + + public Item index(String index) { + this.index = index; + return this; + } + + public String type() { + return type; + } + + public Item type(String type) { + this.type = type; + return this; + } + + public String id() { + return id; + } + + public Item id(String id) { + this.id = id; + return this; } public BytesReference doc() { return doc; } - public Item doc(XContentBuilder doc) { - this.doc = doc.bytes(); + /** + * Sets to a given artificial document, that is a document that is not present in the index. + */ + public Item doc(BytesReference doc) { + this.doc = doc; return this; } + /** + * Sets to a given artificial document, that is a document that is not present in the index. + */ + public Item doc(XContentBuilder doc) { + return this.doc(doc.bytes()); + } + + public String[] fields() { + return fields; + } + + public Item fields(String... fields) { + this.fields = fields; + return this; + } + + public Map perFieldAnalyzer() { + return perFieldAnalyzer; + } + + /** + * Sets the analyzer(s) to use at any given field. + */ + public Item perFieldAnalyzer(Map perFieldAnalyzer) { + this.perFieldAnalyzer = perFieldAnalyzer; + return this; + } + + public String routing() { + return routing; + } + + public Item routing(String routing) { + this.routing = routing; + return this; + } + + public long version() { + return version; + } + + public Item version(long version) { + this.version = version; + return this; + } + + public VersionType versionType() { + return versionType; + } + + public Item versionType(VersionType versionType) { + this.versionType = versionType; + return this; + } + + /** + * Convert this to a {@link TermVectorsRequest} for fetching the terms of the document. + */ + public TermVectorsRequest toTermVectorsRequest() { + TermVectorsRequest termVectorsRequest = new TermVectorsRequest(index, type, id) + .selectedFields(fields) + .routing(routing) + .version(version) + .versionType(versionType) + .perFieldAnalyzer(perFieldAnalyzer) + .positions(false) // ensures these following parameters are never set + .offsets(false) + .payloads(false) + .fieldStatistics(false) + .termStatistics(false) + .dfs(false); + // for artificial docs to make sure that the id has changed in the item too + if (doc != null) { + termVectorsRequest.doc(doc, true); + this.id(termVectorsRequest.id()); + } + return termVectorsRequest; + } + + /** + * Parses and returns the given item. + */ + public static Item parse(XContentParser parser, ParseFieldMatcher parseFieldMatcher, Item item) throws IOException { + XContentParser.Token token; + String currentFieldName = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (currentFieldName != null) { + if (parseFieldMatcher.match(currentFieldName, Field.INDEX)) { + item.index = parser.text(); + } else if (parseFieldMatcher.match(currentFieldName, Field.TYPE)) { + item.type = parser.text(); + } else if (parseFieldMatcher.match(currentFieldName, Field.ID)) { + item.id = parser.text(); + } else if (parseFieldMatcher.match(currentFieldName, Field.DOC)) { + item.doc(jsonBuilder().copyCurrentStructure(parser)); + } else if (parseFieldMatcher.match(currentFieldName, Field.FIELDS)) { + if (token == XContentParser.Token.START_ARRAY) { + List fields = new ArrayList<>(); + while (parser.nextToken() != XContentParser.Token.END_ARRAY) { + fields.add(parser.text()); + } + item.fields(fields.toArray(new String[fields.size()])); + } else { + throw new ElasticsearchParseException( + "failed to parse More Like This item. field [fields] must be an array"); + } + } else if (parseFieldMatcher.match(currentFieldName, Field.PER_FIELD_ANALYZER)) { + item.perFieldAnalyzer(TermVectorsRequest.readPerFieldAnalyzer(parser.map())); + } else if ("_routing".equals(currentFieldName) || "routing".equals(currentFieldName)) { + item.routing = parser.text(); + } else if ("_version".equals(currentFieldName) || "version".equals(currentFieldName)) { + item.version = parser.longValue(); + } else if ("_version_type".equals(currentFieldName) || "_versionType".equals(currentFieldName) + || "version_type".equals(currentFieldName) || "versionType".equals(currentFieldName)) { + item.versionType = VersionType.fromString(parser.text()); + } else { + throw new ElasticsearchParseException( + "failed to parse More Like This item. unknown field [{}]", currentFieldName); + } + } + } + if (item.id != null && item.doc != null) { + throw new ElasticsearchParseException( + "failed to parse More Like This item. either [id] or [doc] can be specified, but not both!"); + } + return item; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - if (this.likeText != null) { - return builder.value(this.likeText); - } builder.startObject(); - if (this.index() != null) { - builder.field("_index", this.index()); + if (this.index != null) { + builder.field(Field.INDEX.getPreferredName(), this.index); } - if (this.type() != null) { - builder.field("_type", this.type()); + if (this.type != null) { + builder.field(Field.TYPE.getPreferredName(), this.type); } - if (this.id() != null) { - builder.field("_id", this.id()); + if (this.id != null && this.doc == null) { + builder.field(Field.ID.getPreferredName(), this.id); } - if (this.doc() != null) { - XContentType contentType = XContentFactory.xContentType(doc); + if (this.doc != null) { + XContentType contentType = XContentFactory.xContentType(this.doc); if (contentType == builder.contentType()) { - builder.rawField("doc", doc); + builder.rawField(Field.DOC.getPreferredName(), this.doc); } else { - XContentParser parser = XContentFactory.xContent(contentType).createParser(doc); + XContentParser parser = XContentFactory.xContent(contentType).createParser(this.doc); parser.nextToken(); - builder.field("doc"); + builder.field(Field.DOC.getPreferredName()); builder.copyCurrentStructure(parser); } } - if (this.fields() != null) { - builder.array("fields", this.fields()); + if (this.fields != null) { + builder.array(Field.FIELDS.getPreferredName(), this.fields); } - if (this.routing() != null) { - builder.field("_routing", this.routing()); + if (this.perFieldAnalyzer != null) { + builder.field(Field.PER_FIELD_ANALYZER.getPreferredName(), this.perFieldAnalyzer); } - if (this.fetchSourceContext() != null) { - FetchSourceContext source = this.fetchSourceContext(); - String[] includes = source.includes(); - String[] excludes = source.excludes(); - if (includes.length == 0 && excludes.length == 0) { - builder.field("_source", source.fetchSource()); - } else if (includes.length > 0 && excludes.length == 0) { - builder.array("_source", source.includes()); - } else if (excludes.length > 0) { - builder.startObject("_source"); - if (includes.length > 0) { - builder.array("includes", source.includes()); - } - builder.array("excludes", source.excludes()); - builder.endObject(); - } + if (this.routing != null) { + builder.field(Field.ROUTING.getPreferredName(), this.routing); } - if (this.version() != Versions.MATCH_ANY) { - builder.field("_version", this.version()); + if (this.version != Versions.MATCH_ANY) { + builder.field(Field.VERSION.getPreferredName(), this.version); } - if (this.versionType() != VersionType.INTERNAL) { - builder.field("_version_type", this.versionType().toString().toLowerCase(Locale.ROOT)); + if (this.versionType != VersionType.INTERNAL) { + builder.field(Field.VERSION_TYPE.getPreferredName(), this.versionType.toString().toLowerCase(Locale.ROOT)); } return builder.endObject(); } + + @Override + public final String toString() { + try { + XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.prettyPrint(); + toXContent(builder, EMPTY_PARAMS); + return builder.string(); + } catch (Exception e) { + return "{ \"error\" : \"" + ExceptionsHelper.detailedMessage(e) + "\"}"; + } + } + + @Override + public int hashCode() { + return Objects.hash(index, type, id, doc, Arrays.hashCode(fields), perFieldAnalyzer, routing, + version, versionType); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof Item)) return false; + Item other = (Item) o; + return Objects.equals(index, other.index) && + Objects.equals(type, other.type) && + Objects.equals(id, other.id) && + Objects.equals(doc, other.doc) && + Arrays.equals(fields, other.fields) && // otherwise we are comparing pointers + Objects.equals(perFieldAnalyzer, other.perFieldAnalyzer) && + Objects.equals(routing, other.routing) && + Objects.equals(version, other.version) && + Objects.equals(versionType, other.versionType); + } } + // document inputs + private List likeTexts = new ArrayList<>(); + private List unlikeTexts = new ArrayList<>(); + private List likeItems = new ArrayList<>(); + private List unlikeItems = new ArrayList<>(); private final String[] fields; - private List docs = new ArrayList<>(); - private List unlikeDocs = new ArrayList<>(); - private Boolean include = null; - private String minimumShouldMatch = null; - private int minTermFreq = -1; + + // term selection parameters private int maxQueryTerms = -1; - private String[] stopWords = null; + private int minTermFreq = -1; private int minDocFreq = -1; private int maxDocFreq = -1; private int minWordLength = -1; private int maxWordLength = -1; - private float boostTerms = -1; - private float boost = -1; + private String[] stopWords = null; private String analyzer; + + // query formation parameters + private String minimumShouldMatch = null; + private float boostTerms = -1; + private Boolean include = null; + + // other parameters private Boolean failOnUnsupportedField; + private float boost = -1; private String queryName; /** @@ -162,108 +388,71 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ this.fields = fields; } - /** - * Sets the documents to use in order to find documents that are "like" this. - * - * @param docs the documents to use when generating the 'More Like This' query. - */ - public MoreLikeThisQueryBuilder like(Item... docs) { - this.docs = Arrays.asList(docs); - return this; - } - /** * Sets the text to use in order to find documents that are "like" this. * - * @param likeText the text to use when generating the 'More Like This' query. + * @param likeTexts the text to use when generating the 'More Like This' query. */ - public MoreLikeThisQueryBuilder like(String... likeText) { - this.docs = new ArrayList<>(); - for (String text : likeText) { - this.docs.add(new Item(text)); - } - return this; + public MoreLikeThisQueryBuilder like(String... likeTexts) { + this.likeTexts = new ArrayList<>(); + return addLikeText(likeTexts); } /** - * Sets the documents from which the terms should not be selected from. + * Sets the documents to use in order to find documents that are "like" this. + * + * @param likeItems the documents to use when generating the 'More Like This' query. */ - public MoreLikeThisQueryBuilder ignoreLike(Item... docs) { - this.unlikeDocs = Arrays.asList(docs); - return this; + public MoreLikeThisQueryBuilder like(Item... likeItems) { + this.likeItems = new ArrayList<>(); + return addLikeItem(likeItems); } /** - * Sets the text from which the terms should not be selected from. + * Adds some text to use in order to find documents that are "like" this. */ - public MoreLikeThisQueryBuilder ignoreLike(String... likeText) { - this.unlikeDocs = new ArrayList<>(); - for (String text : likeText) { - this.unlikeDocs.add(new Item(text)); - } + public MoreLikeThisQueryBuilder addLikeText(String... likeTexts) { + Collections.addAll(this.likeTexts, likeTexts); return this; } /** * Adds a document to use in order to find documents that are "like" this. */ - public MoreLikeThisQueryBuilder addItem(Item item) { - this.docs.add(item); + public MoreLikeThisQueryBuilder addLikeItem(Item... likeItems) { + Collections.addAll(this.likeItems, likeItems); return this; } /** - * Adds some text to use in order to find documents that are "like" this. + * Sets the text from which the terms should not be selected from. */ - public MoreLikeThisQueryBuilder addLikeText(String likeText) { - this.docs.add(new Item(likeText)); + public MoreLikeThisQueryBuilder unlike(String... unlikeTexts) { + this.unlikeTexts = new ArrayList<>(); + return addUnlikeText(unlikeTexts); + } + + /** + * Sets the documents from which the terms should not be selected from. + */ + public MoreLikeThisQueryBuilder unlike(Item... unlikeItems) { + this.unlikeItems = new ArrayList<>(); + return addUnlikeItem(unlikeItems); + } + + /** + * Adds some text to use in order to find documents that are "unlike" this. + */ + public MoreLikeThisQueryBuilder addUnlikeText(String... unlikeTexts) { + Collections.addAll(this.unlikeTexts, unlikeTexts); return this; } /** - * The text to use in order to find documents that are "like" this. + * Adds a document to use in order to find documents that are "unlike" this. */ - @Deprecated - public MoreLikeThisQueryBuilder likeText(String likeText) { - return like(likeText); - } - - @Deprecated - public MoreLikeThisQueryBuilder ids(String... ids) { - Item[] items = new Item[ids.length]; - for (int i = 0; i < items.length; i++) { - items[i] = new Item(null, null, ids[i]); - } - return like(items); - } - - @Deprecated - public MoreLikeThisQueryBuilder docs(Item... docs) { - return like(docs); - } - - public MoreLikeThisQueryBuilder include(boolean include) { - this.include = include; - return this; - } - - /** - * Number of terms that must match the generated query expressed in the - * common syntax for minimum should match. Defaults to 30%. - * - * @see org.elasticsearch.common.lucene.search.Queries#calculateMinShouldMatch(int, String) - */ - public MoreLikeThisQueryBuilder minimumShouldMatch(String minimumShouldMatch) { - this.minimumShouldMatch = minimumShouldMatch; - return this; - } - - /** - * The frequency below which terms will be ignored in the source doc. The default - * frequency is 2. - */ - public MoreLikeThisQueryBuilder minTermFreq(int minTermFreq) { - this.minTermFreq = minTermFreq; + public MoreLikeThisQueryBuilder addUnlikeItem(Item... unlikeItems) { + Collections.addAll(this.unlikeItems, unlikeItems); return this; } @@ -277,14 +466,11 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ } /** - * Set the set of stopwords. - *

- *

Any word in this set is considered "uninteresting" and ignored. Even if your Analyzer allows stopwords, you - * might want to tell the MoreLikeThis code to ignore them, as for the purposes of document similarity it seems - * reasonable to assume that "a stop word is never interesting". + * The frequency below which terms will be ignored in the source doc. The default + * frequency is 2. */ - public MoreLikeThisQueryBuilder stopWords(String... stopWords) { - this.stopWords = stopWords; + public MoreLikeThisQueryBuilder minTermFreq(int minTermFreq) { + this.minTermFreq = minTermFreq; return this; } @@ -325,10 +511,14 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ } /** - * Sets the boost factor to use when boosting terms. Defaults to 1. + * Set the set of stopwords. + *

+ *

Any word in this set is considered "uninteresting" and ignored. Even if your Analyzer allows stopwords, you + * might want to tell the MoreLikeThis code to ignore them, as for the purposes of document similarity it seems + * reasonable to assume that "a stop word is never interesting". */ - public MoreLikeThisQueryBuilder boostTerms(float boostTerms) { - this.boostTerms = boostTerms; + public MoreLikeThisQueryBuilder stopWords(String... stopWords) { + this.stopWords = stopWords; return this; } @@ -340,9 +530,30 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ return this; } - @Override - public MoreLikeThisQueryBuilder boost(float boost) { - this.boost = boost; + /** + * Number of terms that must match the generated query expressed in the + * common syntax for minimum should match. Defaults to 30%. + * + * @see org.elasticsearch.common.lucene.search.Queries#calculateMinShouldMatch(int, String) + */ + public MoreLikeThisQueryBuilder minimumShouldMatch(String minimumShouldMatch) { + this.minimumShouldMatch = minimumShouldMatch; + return this; + } + + /** + * Sets the boost factor to use when boosting terms. Defaults to 1. + */ + public MoreLikeThisQueryBuilder boostTerms(float boostTerms) { + this.boostTerms = boostTerms; + return this; + } + + /** + * Whether to include the input documents. Defaults to false + */ + public MoreLikeThisQueryBuilder include(boolean include) { + this.include = include; return this; } @@ -354,6 +565,12 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ return this; } + @Override + public MoreLikeThisQueryBuilder boost(float boost) { + this.boost = boost; + return this; + } + /** * Sets the query name for the filter that can be used when searching for matched_filters per hit. */ @@ -362,71 +579,123 @@ public class MoreLikeThisQueryBuilder extends QueryBuilder implements BoostableQ return this; } + /** + * The text to use in order to find documents that are "like" this. + */ + @Deprecated + public MoreLikeThisQueryBuilder likeText(String likeText) { + return like(likeText); + } + + @Deprecated + public MoreLikeThisQueryBuilder ids(String... ids) { + Item[] items = new Item[ids.length]; + for (int i = 0; i < items.length; i++) { + items[i] = new Item(null, null, ids[i]); + } + return like(items); + } + + @Deprecated + public MoreLikeThisQueryBuilder docs(Item... docs) { + return like(docs); + } + + /** + * Sets the documents from which the terms should not be selected from. + * + * @Deprecated Use {@link #unlike(Item...)} instead + */ + @Deprecated + public MoreLikeThisQueryBuilder ignoreLike(Item... docs) { + return unlike(docs); + } + + /** + * Sets the text from which the terms should not be selected from. + * + * @Deprecated Use {@link #unlike(String...)} instead. + */ + @Deprecated + public MoreLikeThisQueryBuilder ignoreLike(String... likeText) { + return unlike(likeText); + } + + /** + * Adds a document to use in order to find documents that are "like" this. + */ + @Deprecated + public MoreLikeThisQueryBuilder addItem(Item... likeItems) { + return addLikeItem(likeItems); + } + @Override protected void doXContent(XContentBuilder builder, Params params) throws IOException { - String likeFieldName = MoreLikeThisQueryParser.Fields.LIKE.getPreferredName(); builder.startObject(MoreLikeThisQueryParser.NAME); if (fields != null) { - builder.startArray("fields"); - for (String field : fields) { - builder.value(field); - } - builder.endArray(); + builder.field(MoreLikeThisQueryParser.Field.FIELDS.getPreferredName(), fields); } - if (this.docs.isEmpty()) { - throw new IllegalArgumentException("more_like_this requires '" + likeFieldName + "' to be provided"); + if (this.likeTexts.isEmpty() && this.likeItems.isEmpty()) { + throw new IllegalArgumentException("more_like_this requires '" + MoreLikeThisQueryParser.Field.LIKE.getPreferredName() + "' to be provided"); } else { - builder.field(likeFieldName, docs); + buildLikeField(builder, MoreLikeThisQueryParser.Field.LIKE.getPreferredName(), likeTexts, likeItems); } - if (!unlikeDocs.isEmpty()) { - builder.field(MoreLikeThisQueryParser.Fields.UNLIKE.getPreferredName(), unlikeDocs); - } - if (minimumShouldMatch != null) { - builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch); - } - if (minTermFreq != -1) { - builder.field(MoreLikeThisQueryParser.Fields.MIN_TERM_FREQ.getPreferredName(), minTermFreq); + if (!unlikeTexts.isEmpty() || !unlikeItems.isEmpty()) { + buildLikeField(builder, MoreLikeThisQueryParser.Field.UNLIKE.getPreferredName(), unlikeTexts, unlikeItems); } if (maxQueryTerms != -1) { - builder.field(MoreLikeThisQueryParser.Fields.MAX_QUERY_TERMS.getPreferredName(), maxQueryTerms); + builder.field(MoreLikeThisQueryParser.Field.MAX_QUERY_TERMS.getPreferredName(), maxQueryTerms); } - if (stopWords != null && stopWords.length > 0) { - builder.startArray(MoreLikeThisQueryParser.Fields.STOP_WORDS.getPreferredName()); - for (String stopWord : stopWords) { - builder.value(stopWord); - } - builder.endArray(); + if (minTermFreq != -1) { + builder.field(MoreLikeThisQueryParser.Field.MIN_TERM_FREQ.getPreferredName(), minTermFreq); } if (minDocFreq != -1) { - builder.field(MoreLikeThisQueryParser.Fields.MIN_DOC_FREQ.getPreferredName(), minDocFreq); + builder.field(MoreLikeThisQueryParser.Field.MIN_DOC_FREQ.getPreferredName(), minDocFreq); } if (maxDocFreq != -1) { - builder.field(MoreLikeThisQueryParser.Fields.MAX_DOC_FREQ.getPreferredName(), maxDocFreq); + builder.field(MoreLikeThisQueryParser.Field.MAX_DOC_FREQ.getPreferredName(), maxDocFreq); } if (minWordLength != -1) { - builder.field(MoreLikeThisQueryParser.Fields.MIN_WORD_LENGTH.getPreferredName(), minWordLength); + builder.field(MoreLikeThisQueryParser.Field.MIN_WORD_LENGTH.getPreferredName(), minWordLength); } if (maxWordLength != -1) { - builder.field(MoreLikeThisQueryParser.Fields.MAX_WORD_LENGTH.getPreferredName(), maxWordLength); + builder.field(MoreLikeThisQueryParser.Field.MAX_WORD_LENGTH.getPreferredName(), maxWordLength); + } + if (stopWords != null && stopWords.length > 0) { + builder.field(MoreLikeThisQueryParser.Field.STOP_WORDS.getPreferredName(), stopWords); + } + if (analyzer != null) { + builder.field(MoreLikeThisQueryParser.Field.ANALYZER.getPreferredName(), analyzer); + } + if (minimumShouldMatch != null) { + builder.field(MoreLikeThisQueryParser.Field.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch); } if (boostTerms != -1) { - builder.field(MoreLikeThisQueryParser.Fields.BOOST_TERMS.getPreferredName(), boostTerms); + builder.field(MoreLikeThisQueryParser.Field.BOOST_TERMS.getPreferredName(), boostTerms); + } + if (include != null) { + builder.field(MoreLikeThisQueryParser.Field.INCLUDE.getPreferredName(), include); + } + if (failOnUnsupportedField != null) { + builder.field(MoreLikeThisQueryParser.Field.FAIL_ON_UNSUPPORTED_FIELD.getPreferredName(), failOnUnsupportedField); } if (boost != -1) { builder.field("boost", boost); } - if (analyzer != null) { - builder.field("analyzer", analyzer); - } - if (failOnUnsupportedField != null) { - builder.field(MoreLikeThisQueryParser.Fields.FAIL_ON_UNSUPPORTED_FIELD.getPreferredName(), failOnUnsupportedField); - } if (queryName != null) { builder.field("_name", queryName); } - if (include != null) { - builder.field("include", include); - } builder.endObject(); } + + private static void buildLikeField(XContentBuilder builder, String fieldName, List texts, List items) throws IOException { + builder.startArray(fieldName); + for (String text : texts) { + builder.value(text); + } + for (Item item : items) { + builder.value(item); + } + builder.endArray(); + } } diff --git a/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java b/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java index 4397091fe65..ff390315981 100644 --- a/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java +++ b/core/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java @@ -26,9 +26,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.action.termvectors.MultiTermVectorsRequest; import org.elasticsearch.action.termvectors.MultiTermVectorsResponse; -import org.elasticsearch.action.termvectors.TermVectorsRequest; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.Strings; @@ -38,44 +36,44 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.Analysis; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.internal.UidFieldMapper; +import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item; import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; +import java.util.*; import static org.elasticsearch.index.mapper.Uid.createUidAsBytes; /** + * Parser for the The More Like This Query (MLT Query) which finds documents that are "like" a given set of documents. * + * The documents are provided as a set of strings and/or a list of {@link Item}. */ public class MoreLikeThisQueryParser implements QueryParser { public static final String NAME = "mlt"; private MoreLikeThisFetchService fetchService = null; - public static class Fields { - public static final ParseField LIKE_TEXT = new ParseField("like_text").withAllDeprecated("like"); - public static final ParseField MIN_TERM_FREQ = new ParseField("min_term_freq"); - public static final ParseField MAX_QUERY_TERMS = new ParseField("max_query_terms"); - public static final ParseField MIN_WORD_LENGTH = new ParseField("min_word_length", "min_word_len"); - public static final ParseField MAX_WORD_LENGTH = new ParseField("max_word_length", "max_word_len"); - public static final ParseField MIN_DOC_FREQ = new ParseField("min_doc_freq"); - public static final ParseField MAX_DOC_FREQ = new ParseField("max_doc_freq"); - public static final ParseField BOOST_TERMS = new ParseField("boost_terms"); - public static final ParseField MINIMUM_SHOULD_MATCH = new ParseField("minimum_should_match"); - public static final ParseField FAIL_ON_UNSUPPORTED_FIELD = new ParseField("fail_on_unsupported_field"); - public static final ParseField STOP_WORDS = new ParseField("stop_words"); - public static final ParseField DOCUMENT_IDS = new ParseField("ids").withAllDeprecated("like"); - public static final ParseField DOCUMENTS = new ParseField("docs").withAllDeprecated("like"); - public static final ParseField LIKE = new ParseField("like"); - public static final ParseField UNLIKE = new ParseField("unlike"); - public static final ParseField INCLUDE = new ParseField("include"); + public interface Field { + ParseField FIELDS = new ParseField("fields"); + ParseField LIKE = new ParseField("like"); + ParseField UNLIKE = new ParseField("unlike"); + ParseField LIKE_TEXT = new ParseField("like_text").withAllDeprecated("like"); + ParseField IDS = new ParseField("ids").withAllDeprecated("like"); + ParseField DOCS = new ParseField("docs").withAllDeprecated("like"); + ParseField MAX_QUERY_TERMS = new ParseField("max_query_terms"); + ParseField MIN_TERM_FREQ = new ParseField("min_term_freq"); + ParseField MIN_DOC_FREQ = new ParseField("min_doc_freq"); + ParseField MAX_DOC_FREQ = new ParseField("max_doc_freq"); + ParseField MIN_WORD_LENGTH = new ParseField("min_word_length", "min_word_len"); + ParseField MAX_WORD_LENGTH = new ParseField("max_word_length", "max_word_len"); + ParseField STOP_WORDS = new ParseField("stop_words"); + ParseField ANALYZER = new ParseField("analyzer"); + ParseField MINIMUM_SHOULD_MATCH = new ParseField("minimum_should_match"); + ParseField BOOST_TERMS = new ParseField("boost_terms"); + ParseField INCLUDE = new ParseField("include"); + ParseField FAIL_ON_UNSUPPORTED_FIELD = new ParseField("fail_on_unsupported_field"); } public MoreLikeThisQueryParser() { @@ -98,109 +96,108 @@ public class MoreLikeThisQueryParser implements QueryParser { MoreLikeThisQuery mltQuery = new MoreLikeThisQuery(); mltQuery.setSimilarity(parseContext.searchSimilarity()); - Analyzer analyzer = null; + + List likeTexts = new ArrayList<>(); + List unlikeTexts = new ArrayList<>(); + List likeItems = new ArrayList<>(); + List unlikeItems = new ArrayList<>(); + List moreLikeFields = null; + Analyzer analyzer = null; + boolean include = false; + boolean failOnUnsupportedField = true; String queryName = null; - boolean include = false; XContentParser.Token token; String currentFieldName = null; - - List likeTexts = new ArrayList<>(); - MultiTermVectorsRequest likeItems = new MultiTermVectorsRequest(); - - List unlikeTexts = new ArrayList<>(); - MultiTermVectorsRequest unlikeItems = new MultiTermVectorsRequest(); - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token.isValue()) { - if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.LIKE_TEXT)) { + if (parseContext.parseFieldMatcher().match(currentFieldName, Field.LIKE)) { + parseLikeField(parseContext, likeTexts, likeItems); + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.UNLIKE)) { + parseLikeField(parseContext, unlikeTexts, unlikeItems); + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.LIKE_TEXT)) { likeTexts.add(parser.text()); - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.LIKE)) { - parseLikeField(parser, likeTexts, likeItems); - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.UNLIKE)) { - parseLikeField(parser, unlikeTexts, unlikeItems); - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MIN_TERM_FREQ)) { - mltQuery.setMinTermFrequency(parser.intValue()); - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MAX_QUERY_TERMS)) { + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MAX_QUERY_TERMS)) { mltQuery.setMaxQueryTerms(parser.intValue()); - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MIN_DOC_FREQ)) { + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MIN_TERM_FREQ)) { + mltQuery.setMinTermFrequency(parser.intValue()); + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MIN_DOC_FREQ)) { mltQuery.setMinDocFreq(parser.intValue()); - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MAX_DOC_FREQ)) { + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MAX_DOC_FREQ)) { mltQuery.setMaxDocFreq(parser.intValue()); - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MIN_WORD_LENGTH)) { + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MIN_WORD_LENGTH)) { mltQuery.setMinWordLen(parser.intValue()); - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MAX_WORD_LENGTH)) { + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MAX_WORD_LENGTH)) { mltQuery.setMaxWordLen(parser.intValue()); - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.BOOST_TERMS)) { + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.ANALYZER)) { + analyzer = parseContext.analysisService().analyzer(parser.text()); + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MINIMUM_SHOULD_MATCH)) { + mltQuery.setMinimumShouldMatch(parser.text()); + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.BOOST_TERMS)) { float boostFactor = parser.floatValue(); if (boostFactor != 0) { mltQuery.setBoostTerms(true); mltQuery.setBoostTermsFactor(boostFactor); } - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.MINIMUM_SHOULD_MATCH)) { - mltQuery.setMinimumShouldMatch(parser.text()); - } else if ("analyzer".equals(currentFieldName)) { - analyzer = parseContext.analysisService().analyzer(parser.text()); + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.INCLUDE)) { + include = parser.booleanValue(); + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.FAIL_ON_UNSUPPORTED_FIELD)) { + failOnUnsupportedField = parser.booleanValue(); } else if ("boost".equals(currentFieldName)) { mltQuery.setBoost(parser.floatValue()); - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.FAIL_ON_UNSUPPORTED_FIELD)) { - failOnUnsupportedField = parser.booleanValue(); } else if ("_name".equals(currentFieldName)) { queryName = parser.text(); - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.INCLUDE)) { - include = parser.booleanValue(); } else { throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]"); } } else if (token == XContentParser.Token.START_ARRAY) { - if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.STOP_WORDS)) { - Set stopWords = Sets.newHashSet(); - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - stopWords.add(parser.text()); - } - mltQuery.setStopWords(stopWords); - } else if ("fields".equals(currentFieldName)) { + if (parseContext.parseFieldMatcher().match(currentFieldName, Field.FIELDS)) { moreLikeFields = new LinkedList<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { String field = parser.text(); MappedFieldType fieldType = parseContext.fieldMapper(field); moreLikeFields.add(fieldType == null ? field : fieldType.names().indexName()); } - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.DOCUMENT_IDS)) { + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.LIKE)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + parseLikeField(parseContext, likeTexts, likeItems); + } + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.UNLIKE)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + parseLikeField(parseContext, unlikeTexts, unlikeItems); + } + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.IDS)) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { if (!token.isValue()) { throw new IllegalArgumentException("ids array element should only contain ids"); } - likeItems.add(newTermVectorsRequest().id(parser.text())); + likeItems.add(new Item(null, null, parser.text())); } - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.DOCUMENTS)) { + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.DOCS)) { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { if (token != XContentParser.Token.START_OBJECT) { throw new IllegalArgumentException("docs array element should include an object"); } - likeItems.add(parseDocument(parser)); + likeItems.add(Item.parse(parser, parseContext.parseFieldMatcher(), new Item())); } - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.LIKE)) { + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.STOP_WORDS)) { + Set stopWords = Sets.newHashSet(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - parseLikeField(parser, likeTexts, likeItems); - } - } else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.UNLIKE)) { - while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - parseLikeField(parser, unlikeTexts, unlikeItems); + stopWords.add(parser.text()); } + mltQuery.setStopWords(stopWords); } else { throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]"); } } else if (token == XContentParser.Token.START_OBJECT) { - if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.LIKE)) { - parseLikeField(parser, likeTexts, likeItems); - } - else if (parseContext.parseFieldMatcher().match(currentFieldName, Fields.UNLIKE)) { - parseLikeField(parser, unlikeTexts, unlikeItems); + if (parseContext.parseFieldMatcher().match(currentFieldName, Field.LIKE)) { + parseLikeField(parseContext, likeTexts, likeItems); + } else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.UNLIKE)) { + parseLikeField(parseContext, unlikeTexts, unlikeItems); } else { throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]"); } @@ -225,6 +222,7 @@ public class MoreLikeThisQueryParser implements QueryParser { if (useDefaultField) { moreLikeFields = Collections.singletonList(parseContext.defaultField()); } + // possibly remove unsupported fields removeUnsupportedFields(moreLikeFields, analyzer, failOnUnsupportedField); if (moreLikeFields.isEmpty()) { @@ -242,93 +240,29 @@ public class MoreLikeThisQueryParser implements QueryParser { mltQuery.setLikeText(likeTexts); } if (!unlikeTexts.isEmpty()) { - mltQuery.setIgnoreText(unlikeTexts); + mltQuery.setUnlikeText(unlikeTexts); } // handle items if (!likeItems.isEmpty()) { - // set default index, type and fields if not specified - MultiTermVectorsRequest items = likeItems; - for (TermVectorsRequest item : unlikeItems) { - items.add(item); - } - - for (TermVectorsRequest item : items) { - if (item.index() == null) { - item.index(parseContext.index().name()); - } - if (item.type() == null) { - if (parseContext.queryTypes().size() > 1) { - throw new QueryParsingException(parseContext, - "ambiguous type for item with id: " + item.id() - + " and index: " + item.index()); - } else { - item.type(parseContext.queryTypes().iterator().next()); - } - } - // default fields if not present but don't override for artificial docs - if (item.selectedFields() == null && item.doc() == null) { - if (useDefaultField) { - item.selectedFields("*"); - } else { - item.selectedFields(moreLikeFields.toArray(new String[moreLikeFields.size()])); - } - } - } - // fetching the items with multi-termvectors API - items.copyContextAndHeadersFrom(SearchContext.current()); - MultiTermVectorsResponse responses = fetchService.fetchResponse(items); - - // getting the Fields for liked items - mltQuery.setLikeText(MoreLikeThisFetchService.getFields(responses, likeItems)); - - // getting the Fields for ignored items - if (!unlikeItems.isEmpty()) { - org.apache.lucene.index.Fields[] ignoreFields = MoreLikeThisFetchService.getFields(responses, unlikeItems); - if (ignoreFields.length > 0) { - mltQuery.setUnlikeText(ignoreFields); - } - } - - BooleanQuery.Builder boolQuery = new BooleanQuery.Builder(); - boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD); - - // exclude the items from the search - if (!include) { - handleExclude(boolQuery, likeItems); - } - return boolQuery.build(); + return handleItems(parseContext, mltQuery, likeItems, unlikeItems, include, moreLikeFields, useDefaultField); + } else { + return mltQuery; } - - return mltQuery; } - private TermVectorsRequest parseDocument(XContentParser parser) throws IOException { - TermVectorsRequest termVectorsRequest = newTermVectorsRequest(); - TermVectorsRequest.parseRequest(termVectorsRequest, parser); - return termVectorsRequest; - } - - private void parseLikeField(XContentParser parser, List likeTexts, MultiTermVectorsRequest items) throws IOException { + private static void parseLikeField(QueryParseContext parseContext, List texts, List items) throws IOException { + XContentParser parser = parseContext.parser(); if (parser.currentToken().isValue()) { - likeTexts.add(parser.text()); + texts.add(parser.text()); } else if (parser.currentToken() == XContentParser.Token.START_OBJECT) { - items.add(parseDocument(parser)); + items.add(Item.parse(parser, parseContext.parseFieldMatcher(), new Item())); } else { throw new IllegalArgumentException("Content of 'like' parameter should either be a string or an object"); } } - private TermVectorsRequest newTermVectorsRequest() { - return new TermVectorsRequest() - .positions(false) - .offsets(false) - .payloads(false) - .fieldStatistics(false) - .termStatistics(false); - } - - private List removeUnsupportedFields(List moreLikeFields, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException { + private static List removeUnsupportedFields(List moreLikeFields, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException { for (Iterator it = moreLikeFields.iterator(); it.hasNext(); ) { final String fieldName = it.next(); if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) { @@ -342,10 +276,67 @@ public class MoreLikeThisQueryParser implements QueryParser { return moreLikeFields; } - private void handleExclude(BooleanQuery.Builder boolQuery, MultiTermVectorsRequest likeItems) { + private Query handleItems(QueryParseContext parseContext, MoreLikeThisQuery mltQuery, List likeItems, List unlikeItems, + boolean include, List moreLikeFields, boolean useDefaultField) throws IOException { + // set default index, type and fields if not specified + for (Item item : likeItems) { + setDefaultIndexTypeFields(parseContext, item, moreLikeFields, useDefaultField); + } + for (Item item : unlikeItems) { + setDefaultIndexTypeFields(parseContext, item, moreLikeFields, useDefaultField); + } + + // fetching the items with multi-termvectors API + MultiTermVectorsResponse responses = fetchService.fetchResponse(likeItems, unlikeItems, SearchContext.current()); + + // getting the Fields for liked items + mltQuery.setLikeText(MoreLikeThisFetchService.getFieldsFor(responses, likeItems)); + + // getting the Fields for unliked items + if (!unlikeItems.isEmpty()) { + org.apache.lucene.index.Fields[] unlikeFields = MoreLikeThisFetchService.getFieldsFor(responses, unlikeItems); + if (unlikeFields.length > 0) { + mltQuery.setUnlikeText(unlikeFields); + } + } + + BooleanQuery boolQuery = new BooleanQuery(); + boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD); + + // exclude the items from the search + if (!include) { + handleExclude(boolQuery, likeItems); + } + return boolQuery; + } + + private static void setDefaultIndexTypeFields(QueryParseContext parseContext, Item item, List moreLikeFields, + boolean useDefaultField) { + if (item.index() == null) { + item.index(parseContext.index().name()); + } + if (item.type() == null) { + if (parseContext.queryTypes().size() > 1) { + throw new QueryParsingException(parseContext, + "ambiguous type for item with id: " + item.id() + " and index: " + item.index()); + } else { + item.type(parseContext.queryTypes().iterator().next()); + } + } + // default fields if not present but don't override for artificial docs + if ((item.fields() == null || item.fields().length == 0) && item.doc() == null) { + if (useDefaultField) { + item.fields("*"); + } else { + item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()])); + } + } + } + + private static void handleExclude(BooleanQuery boolQuery, List likeItems) { // artificial docs get assigned a random id and should be disregarded List uids = new ArrayList<>(); - for (TermVectorsRequest item : likeItems) { + for (Item item : likeItems) { if (item.doc() != null) { continue; } diff --git a/core/src/main/java/org/elasticsearch/index/search/morelikethis/MoreLikeThisFetchService.java b/core/src/main/java/org/elasticsearch/index/search/morelikethis/MoreLikeThisFetchService.java index 1c64d2b0fab..49643aaafa6 100644 --- a/core/src/main/java/org/elasticsearch/index/search/morelikethis/MoreLikeThisFetchService.java +++ b/core/src/main/java/org/elasticsearch/index/search/morelikethis/MoreLikeThisFetchService.java @@ -20,12 +20,17 @@ package org.elasticsearch.index.search.morelikethis; import org.apache.lucene.index.Fields; -import org.elasticsearch.action.termvectors.*; +import org.elasticsearch.action.termvectors.MultiTermVectorsItemResponse; +import org.elasticsearch.action.termvectors.MultiTermVectorsRequest; +import org.elasticsearch.action.termvectors.MultiTermVectorsResponse; +import org.elasticsearch.action.termvectors.TermVectorsResponse; import org.elasticsearch.client.Client; +import org.elasticsearch.common.Nullable; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item; +import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.ArrayList; @@ -46,24 +51,35 @@ public class MoreLikeThisFetchService extends AbstractComponent { this.client = client; } - public Fields[] fetch(MultiTermVectorsRequest requests) throws IOException { - return getFields(fetchResponse(requests), requests); + public Fields[] fetch(List items) throws IOException { + return getFieldsFor(fetchResponse(items, null, SearchContext.current()), items); } - public MultiTermVectorsResponse fetchResponse(MultiTermVectorsRequest requests) throws IOException { - return client.multiTermVectors(requests).actionGet(); + public MultiTermVectorsResponse fetchResponse(List likeItems, @Nullable List unlikeItems, + SearchContext searchContext) throws IOException { + MultiTermVectorsRequest request = new MultiTermVectorsRequest(); + for (Item item : likeItems) { + request.add(item.toTermVectorsRequest()); + } + if (unlikeItems != null) { + for (Item item : unlikeItems) { + request.add(item.toTermVectorsRequest()); + } + } + request.copyContextAndHeadersFrom(searchContext); + return client.multiTermVectors(request).actionGet(); } - public static Fields[] getFields(MultiTermVectorsResponse responses, MultiTermVectorsRequest requests) throws IOException { + public static Fields[] getFieldsFor(MultiTermVectorsResponse responses, List items) throws IOException { List likeFields = new ArrayList<>(); - Set items = new HashSet<>(); - for (TermVectorsRequest request : requests) { - items.add(new Item(request.index(), request.type(), request.id())); + Set selectedItems = new HashSet<>(); + for (Item request : items) { + selectedItems.add(new Item(request.index(), request.type(), request.id())); } for (MultiTermVectorsItemResponse response : responses) { - if (!hasResponseFromRequest(response, items)) { + if (!hasResponseFromRequest(response, selectedItems)) { continue; } if (response.isFailed()) { @@ -78,7 +94,7 @@ public class MoreLikeThisFetchService extends AbstractComponent { return likeFields.toArray(Fields.EMPTY_ARRAY); } - private static boolean hasResponseFromRequest(MultiTermVectorsItemResponse response, Set items) { - return items.contains(new Item(response.getIndex(), response.getType(), response.getId())); + private static boolean hasResponseFromRequest(MultiTermVectorsItemResponse response, Set selectedItems) { + return selectedItems.contains(new Item(response.getIndex(), response.getType(), response.getId())); } } diff --git a/core/src/main/java/org/elasticsearch/indices/IndicesModule.java b/core/src/main/java/org/elasticsearch/indices/IndicesModule.java index 759c9e5e150..24e5a3262e8 100644 --- a/core/src/main/java/org/elasticsearch/indices/IndicesModule.java +++ b/core/src/main/java/org/elasticsearch/indices/IndicesModule.java @@ -28,6 +28,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.ExtensionPoint; import org.elasticsearch.index.query.*; import org.elasticsearch.index.query.functionscore.FunctionScoreQueryParser; +import org.elasticsearch.index.query.MoreLikeThisQueryParser; import org.elasticsearch.indices.analysis.HunspellService; import org.elasticsearch.indices.analysis.IndicesAnalysisService; import org.elasticsearch.indices.cache.query.IndicesQueryCache; diff --git a/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java b/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java index 58a2b3e3a29..e8b39a5d032 100644 --- a/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/SimpleIndexQueryParserTests.java @@ -21,41 +21,15 @@ package org.elasticsearch.index.query; import com.google.common.collect.Sets; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.MultiFields; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.*; import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.queries.BoostingQuery; import org.apache.lucene.queries.ExtendedCommonTermsQuery; import org.apache.lucene.queries.TermsQuery; -import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.*; import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.ConstantScoreQuery; -import org.apache.lucene.search.DisjunctionMaxQuery; -import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.NumericRangeQuery; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.QueryWrapperFilter; -import org.apache.lucene.search.RegexpQuery; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.join.ToParentBlockJoinQuery; -import org.apache.lucene.search.spans.FieldMaskingSpanQuery; -import org.apache.lucene.search.spans.SpanContainingQuery; -import org.apache.lucene.search.spans.SpanFirstQuery; -import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; -import org.apache.lucene.search.spans.SpanNearQuery; -import org.apache.lucene.search.spans.SpanNotQuery; -import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.search.spans.SpanWithinQuery; +import org.apache.lucene.search.spans.*; import org.apache.lucene.spatial.prefix.IntersectsPrefixTreeFilter; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; @@ -63,12 +37,10 @@ import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; import org.elasticsearch.action.termvectors.MultiTermVectorsItemResponse; -import org.elasticsearch.action.termvectors.MultiTermVectorsRequest; import org.elasticsearch.action.termvectors.MultiTermVectorsResponse; import org.elasticsearch.action.termvectors.TermVectorsRequest; import org.elasticsearch.action.termvectors.TermVectorsResponse; import org.elasticsearch.cluster.metadata.MetaData; -import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.lucene.search.MoreLikeThisQuery; @@ -87,6 +59,7 @@ import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.core.NumberFieldMapper; +import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item; import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; import org.elasticsearch.index.search.geo.GeoDistanceRangeQuery; import org.elasticsearch.index.search.geo.GeoPolygonQuery; @@ -99,10 +72,7 @@ import org.junit.Before; import org.junit.Test; import java.io.IOException; -import java.util.Arrays; -import java.util.EnumSet; -import java.util.List; -import java.util.Locale; +import java.util.*; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.index.query.QueryBuilders.*; @@ -1772,15 +1742,15 @@ public class SimpleIndexQueryParserTests extends ESSingleNodeTestCase { } @Override - public MultiTermVectorsResponse fetchResponse(MultiTermVectorsRequest items) throws IOException { + public MultiTermVectorsResponse fetchResponse(List items, List unlikeItems, SearchContext searchContext) throws IOException { MultiTermVectorsItemResponse[] responses = new MultiTermVectorsItemResponse[items.size()]; int i = 0; - for (TermVectorsRequest item : items) { + for (Item item : items) { TermVectorsResponse response = new TermVectorsResponse(item.index(), item.type(), item.id()); response.setExists(true); - Fields generatedFields = generateFields(item.selectedFields().toArray(Strings.EMPTY_ARRAY), item.id()); + Fields generatedFields = generateFields(item.fields(), item.id()); EnumSet flags = EnumSet.of(TermVectorsRequest.Flag.Positions, TermVectorsRequest.Flag.Offsets); - response.setFields(generatedFields, item.selectedFields(), flags, generatedFields); + response.setFields(generatedFields, new HashSet(Arrays.asList(item.fields())), flags, generatedFields); responses[i++] = new MultiTermVectorsItemResponse(response, null); } return new MultiTermVectorsResponse(responses); diff --git a/core/src/test/java/org/elasticsearch/search/morelikethis/ItemSerializationTests.java b/core/src/test/java/org/elasticsearch/search/morelikethis/ItemSerializationTests.java index ab06a3d5d51..5f5f42aa7b2 100644 --- a/core/src/test/java/org/elasticsearch/search/morelikethis/ItemSerializationTests.java +++ b/core/src/test/java/org/elasticsearch/search/morelikethis/ItemSerializationTests.java @@ -20,71 +20,28 @@ package org.elasticsearch.search.morelikethis; import com.carrotsearch.randomizedtesting.generators.RandomPicks; -import org.elasticsearch.action.get.MultiGetRequest; -import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.xcontent.ToXContent; -import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.VersionType; -import org.elasticsearch.index.query.MoreLikeThisQueryBuilder; import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item; -import org.elasticsearch.search.fetch.source.FetchSourceContext; import org.elasticsearch.test.ESTestCase; import org.junit.Test; -import java.io.IOException; -import java.util.List; import java.util.Random; -import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath; -import static org.hamcrest.Matchers.is; - public class ItemSerializationTests extends ESTestCase { private Item generateRandomItem(int arraySize, int stringSize) { String index = randomAsciiOfLength(stringSize); String type = randomAsciiOfLength(stringSize); String id = String.valueOf(Math.abs(randomInt())); - String routing = randomBoolean() ? randomAsciiOfLength(stringSize) : null; String[] fields = generateRandomStringArray(arraySize, stringSize, true); - + String routing = randomBoolean() ? randomAsciiOfLength(stringSize) : null; long version = Math.abs(randomLong()); VersionType versionType = RandomPicks.randomFrom(new Random(), VersionType.values()); - - FetchSourceContext fetchSourceContext; - switch (randomIntBetween(0, 3)) { - case 0 : - fetchSourceContext = new FetchSourceContext(randomBoolean()); - break; - case 1 : - fetchSourceContext = new FetchSourceContext(generateRandomStringArray(arraySize, stringSize, true)); - break; - case 2 : - fetchSourceContext = new FetchSourceContext(generateRandomStringArray(arraySize, stringSize, true), - generateRandomStringArray(arraySize, stringSize, true)); - break; - default: - fetchSourceContext = null; - break; - } - return (Item) new Item(index, type, id).routing(routing).fields(fields).version(version).versionType(versionType) - .fetchSourceContext(fetchSourceContext); - } - - private String ItemToJSON(Item item) throws IOException { - XContentBuilder builder = XContentFactory.jsonBuilder(); - builder.startObject(); - builder.startArray("docs"); - item.toXContent(builder, ToXContent.EMPTY_PARAMS); - builder.endArray(); - builder.endObject(); - return XContentHelper.convertToJson(builder.bytes(), false); - } - - private MultiGetRequest.Item JSONtoItem(String json) throws Exception { - MultiGetRequest request = new MultiGetRequest().add(null, null, null, null, new BytesArray(json), true); - return request.getItems().get(0); + return new Item(index, type, id).fields(fields).routing(routing).version(version).versionType(versionType); } @Test @@ -94,66 +51,10 @@ public class ItemSerializationTests extends ESTestCase { int maxStringSize = 8; for (int i = 0; i < numOfTrials; i++) { Item item1 = generateRandomItem(maxArraySize, maxStringSize); - String json = ItemToJSON(item1); - MultiGetRequest.Item item2 = JSONtoItem(json); + String json = item1.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS).string(); + XContentParser parser = XContentFactory.xContent(json).createParser(json); + Item item2 = Item.parse(parser, ParseFieldMatcher.STRICT, new Item()); assertEquals(item1, item2); } } - - private List testItemsFromJSON(String json) throws Exception { - MultiGetRequest request = new MultiGetRequest(); - request.add(null, null, null, null, new BytesArray(json), true); - List items = request.getItems(); - - assertEquals(items.size(), 3); - for (MultiGetRequest.Item item : items) { - assertThat(item.index(), is("test")); - assertThat(item.type(), is("type")); - FetchSourceContext fetchSource = item.fetchSourceContext(); - switch (item.id()) { - case "1" : - assertThat(fetchSource.fetchSource(), is(false)); - break; - case "2" : - assertThat(fetchSource.fetchSource(), is(true)); - assertThat(fetchSource.includes(), is(new String[]{"field3", "field4"})); - break; - case "3" : - assertThat(fetchSource.fetchSource(), is(true)); - assertThat(fetchSource.includes(), is(new String[]{"user"})); - assertThat(fetchSource.excludes(), is(new String[]{"user.location"})); - break; - default: - fail("item with id: " + item.id() + " is not 1, 2 or 3"); - break; - } - } - return items; - } - - @Test - public void testSimpleItemSerializationFromFile() throws Exception { - // test items from JSON - List itemsFromJSON = testItemsFromJSON( - copyToStringFromClasspath("/org/elasticsearch/search/morelikethis/items.json")); - - // create builder from items - XContentBuilder builder = XContentFactory.jsonBuilder(); - builder.startObject(); - builder.startArray("docs"); - for (MultiGetRequest.Item item : itemsFromJSON) { - MoreLikeThisQueryBuilder.Item itemForBuilder = (MoreLikeThisQueryBuilder.Item) new MoreLikeThisQueryBuilder.Item( - item.index(), item.type(), item.id()) - .fetchSourceContext(item.fetchSourceContext()) - .fields(item.fields()); - itemForBuilder.toXContent(builder, ToXContent.EMPTY_PARAMS); - } - builder.endArray(); - builder.endObject(); - - // verify generated JSON lead to the same items - String json = XContentHelper.convertToJson(builder.bytes(), false); - testItemsFromJSON(json); - } - } diff --git a/core/src/test/java/org/elasticsearch/search/morelikethis/MoreLikeThisIT.java b/core/src/test/java/org/elasticsearch/search/morelikethis/MoreLikeThisIT.java index 52df8932291..bbc992f75ae 100644 --- a/core/src/test/java/org/elasticsearch/search/morelikethis/MoreLikeThisIT.java +++ b/core/src/test/java/org/elasticsearch/search/morelikethis/MoreLikeThisIT.java @@ -43,8 +43,8 @@ import static org.elasticsearch.client.Requests.*; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; -import static org.elasticsearch.index.query.QueryBuilders.termQuery; import static org.elasticsearch.index.query.QueryBuilders.moreLikeThisQuery; +import static org.elasticsearch.index.query.QueryBuilders.termQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.notNullValue; @@ -57,11 +57,11 @@ public class MoreLikeThisIT extends ESIntegTestCase { @Test public void testSimpleMoreLikeThis() throws Exception { logger.info("Creating index test"); - assertAcked(prepareCreate("test").addMapping("type1", + assertAcked(prepareCreate("test").addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties") - .startObject("text").field("type", "string").endObject() - .endObject().endObject().endObject())); - + .startObject("text").field("type", "string").endObject() + .endObject().endObject().endObject())); + logger.info("Running Cluster Health"); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); @@ -72,11 +72,10 @@ public class MoreLikeThisIT extends ESIntegTestCase { logger.info("Running moreLikeThis"); SearchResponse response = client().prepareSearch().setQuery( - new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 1l); } - - + @Test public void testSimpleMoreLikeOnLongField() throws Exception { logger.info("Creating index test"); @@ -89,23 +88,21 @@ public class MoreLikeThisIT extends ESIntegTestCase { client().index(indexRequest("test").type("type2").id("2").source(jsonBuilder().startObject().field("some_long", 0).endObject())).actionGet(); client().index(indexRequest("test").type("type1").id("3").source(jsonBuilder().startObject().field("some_long", -666).endObject())).actionGet(); - client().admin().indices().refresh(refreshRequest()).actionGet(); logger.info("Running moreLikeThis"); SearchResponse response = client().prepareSearch().setQuery( - new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 0l); } - @Test public void testMoreLikeThisWithAliases() throws Exception { logger.info("Creating index test"); - assertAcked(prepareCreate("test").addMapping("type1", + assertAcked(prepareCreate("test").addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties") - .startObject("text").field("type", "string").endObject() - .endObject().endObject().endObject())); + .startObject("text").field("type", "string").endObject() + .endObject().endObject().endObject())); logger.info("Creating aliases alias release"); client().admin().indices().aliases(indexAliasesRequest().addAlias("release", termQuery("text", "release"), "test")).actionGet(); client().admin().indices().aliases(indexAliasesRequest().addAlias("beta", termQuery("text", "beta"), "test")).actionGet(); @@ -122,27 +119,26 @@ public class MoreLikeThisIT extends ESIntegTestCase { logger.info("Running moreLikeThis on index"); SearchResponse response = client().prepareSearch().setQuery( - new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 2l); logger.info("Running moreLikeThis on beta shard"); response = client().prepareSearch("beta").setQuery( - new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 1l); assertThat(response.getHits().getAt(0).id(), equalTo("3")); logger.info("Running moreLikeThis on release shard"); response = client().prepareSearch("release").setQuery( - new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 1l); assertThat(response.getHits().getAt(0).id(), equalTo("2")); logger.info("Running moreLikeThis on alias with node client"); response = internalCluster().clientNodeClient().prepareSearch("beta").setQuery( - new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 1l); assertThat(response.getHits().getAt(0).id(), equalTo("3")); - } @Test @@ -160,11 +156,11 @@ public class MoreLikeThisIT extends ESIntegTestCase { assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); SearchResponse response = client().prepareSearch().setQuery( - new MoreLikeThisQueryBuilder().addItem(new Item("foo", "bar", "1"))).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1"))).get(); assertNoFailures(response); assertThat(response, notNullValue()); response = client().prepareSearch().setQuery( - new MoreLikeThisQueryBuilder().addItem(new Item("foo", "bar", "1"))).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1"))).get(); assertNoFailures(response); assertThat(response, notNullValue()); } @@ -186,7 +182,7 @@ public class MoreLikeThisIT extends ESIntegTestCase { client().admin().indices().prepareRefresh("foo").execute().actionGet(); SearchResponse response = client().prepareSearch().setQuery( - new MoreLikeThisQueryBuilder().addItem((Item) new Item("foo", "bar", "1").routing("2"))).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1").routing("2"))).get(); assertNoFailures(response); assertThat(response, notNullValue()); } @@ -209,7 +205,7 @@ public class MoreLikeThisIT extends ESIntegTestCase { .execute().actionGet(); client().admin().indices().prepareRefresh("foo").execute().actionGet(); SearchResponse response = client().prepareSearch().setQuery( - new MoreLikeThisQueryBuilder().addItem((Item) new Item("foo", "bar", "1").routing("4000"))).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1").routing("4000"))).get(); assertNoFailures(response); assertThat(response, notNullValue()); } @@ -237,12 +233,12 @@ public class MoreLikeThisIT extends ESIntegTestCase { // Implicit list of fields -> ignore numeric fields SearchResponse searchResponse = client().prepareSearch().setQuery( - new MoreLikeThisQueryBuilder().addItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(searchResponse, 1l); // Explicit list of fields including numeric fields -> fail assertThrows(client().prepareSearch().setQuery( - new MoreLikeThisQueryBuilder("string_value", "int_value").addItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class); + new MoreLikeThisQueryBuilder("string_value", "int_value").addLikeItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class); // mlt query with no field -> OK searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery().likeText("index").minTermFreq(1).minDocFreq(1)).execute().actionGet(); @@ -299,16 +295,16 @@ public class MoreLikeThisIT extends ESIntegTestCase { logger.info("Running More Like This with include true"); SearchResponse response = client().prepareSearch().setQuery( - new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get(); assertOrderedSearchHits(response, "1", "2"); response = client().prepareSearch().setQuery( - new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "2")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "2")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get(); assertOrderedSearchHits(response, "2", "1"); logger.info("Running More Like This with include false"); response = client().prepareSearch().setQuery( - new MoreLikeThisQueryBuilder().addItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).minimumShouldMatch("0%")).get(); + new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).minimumShouldMatch("0%")).get(); assertSearchHits(response, "2"); } @@ -359,7 +355,7 @@ public class MoreLikeThisIT extends ESIntegTestCase { logger.info("Running MoreLikeThis"); MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").include(true).minTermFreq(1).minDocFreq(1) - .addItem(new MoreLikeThisQueryBuilder.Item("test", "type0", "0")); + .addLikeItem(new Item("test", "type0", "0")); String[] types = new String[numOfTypes]; for (int i = 0; i < numOfTypes; i++) { @@ -389,8 +385,7 @@ public class MoreLikeThisIT extends ESIntegTestCase { indexRandom(true, builders); int maxIters = randomIntBetween(10, 20); - for (int i = 0; i < maxIters; i++) - { + for (int i = 0; i < maxIters; i++) { int max_query_terms = randomIntBetween(1, values.length); logger.info("Running More Like This with max_query_terms = %s", max_query_terms); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery("text").ids("0").minTermFreq(1).minDocFreq(1) @@ -451,14 +446,14 @@ public class MoreLikeThisIT extends ESIntegTestCase { logger.info("Indexing a single document ..."); XContentBuilder doc = jsonBuilder().startObject(); for (int i = 0; i < numFields; i++) { - doc.field("field"+i, generateRandomStringArray(5, 10, false)+"a"); // make sure they are not all empty + doc.field("field" + i, generateRandomStringArray(5, 10, false) + "a"); // make sure they are not all empty } doc.endObject(); indexRandom(true, client().prepareIndex("test", "type1", "0").setSource(doc)); logger.info("Checking the document matches ..."); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery() - .like((Item) new Item().doc(doc).index("test").type("type1").routing("0")) // routing to ensure we hit the shard with the doc + .like(new Item("test", "type1", doc).routing("0")) // routing to ensure we hit the shard with the doc .minTermFreq(0) .minDocFreq(0) .maxQueryTerms(100) @@ -479,18 +474,18 @@ public class MoreLikeThisIT extends ESIntegTestCase { logger.info("Creating an index with a single document ..."); indexRandom(true, client().prepareIndex("test", "type1", "1").setSource(jsonBuilder() .startObject() - .field("text", "Hello World!") - .field("date", "2009-01-01") + .field("text", "Hello World!") + .field("date", "2009-01-01") .endObject())); logger.info("Checking with a malformed field value ..."); XContentBuilder malformedFieldDoc = jsonBuilder() .startObject() - .field("text", "Hello World!") - .field("date", "this is not a date!") + .field("text", "Hello World!") + .field("date", "this is not a date!") .endObject(); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery() - .like((Item) new Item().doc(malformedFieldDoc).index("test").type("type1")) + .like(new Item("test", "type1", malformedFieldDoc)) .minTermFreq(0) .minDocFreq(0) .minimumShouldMatch("0%"); @@ -502,7 +497,7 @@ public class MoreLikeThisIT extends ESIntegTestCase { logger.info("Checking with an empty document ..."); XContentBuilder emptyDoc = jsonBuilder().startObject().endObject(); mltQuery = moreLikeThisQuery() - .like((Item) new Item().doc(emptyDoc).index("test").type("type1")) + .like(new Item("test", "type1", emptyDoc)) .minTermFreq(0) .minDocFreq(0) .minimumShouldMatch("0%"); @@ -514,7 +509,7 @@ public class MoreLikeThisIT extends ESIntegTestCase { logger.info("Checking when document is malformed ..."); XContentBuilder malformedDoc = jsonBuilder().startObject(); mltQuery = moreLikeThisQuery() - .like((Item) new Item().doc(malformedDoc).index("test").type("type1")) + .like(new Item("test", "type1", malformedDoc)) .minTermFreq(0) .minDocFreq(0) .minimumShouldMatch("0%"); @@ -526,11 +521,11 @@ public class MoreLikeThisIT extends ESIntegTestCase { logger.info("Checking the document matches otherwise ..."); XContentBuilder normalDoc = jsonBuilder() .startObject() - .field("text", "Hello World!") - .field("date", "1000-01-01") // should be properly parsed but ignored ... + .field("text", "Hello World!") + .field("date", "1000-01-01") // should be properly parsed but ignored ... .endObject(); mltQuery = moreLikeThisQuery() - .like((Item) new Item().doc(normalDoc).index("test").type("type1")) + .like(new Item("test", "type1", normalDoc)) .minTermFreq(0) .minDocFreq(0) .minimumShouldMatch("100%"); // strict all terms must match but date is ignored @@ -541,7 +536,7 @@ public class MoreLikeThisIT extends ESIntegTestCase { } @Test - public void testMoreLikeThisIgnoreLike() throws ExecutionException, InterruptedException, IOException { + public void testMoreLikeThisUnlike() throws ExecutionException, InterruptedException, IOException { createIndex("test"); ensureGreen(); int numFields = randomIntBetween(5, 10); @@ -561,8 +556,8 @@ public class MoreLikeThisIT extends ESIntegTestCase { indexRandom(true, builders); logger.info("First check the document matches all indexed docs."); - MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery("field0") - .like((Item) new Item().doc(doc).index("test").type("type1")) + MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery() + .like(new Item("test", "type1", doc)) .minTermFreq(0) .minDocFreq(0) .maxQueryTerms(100) @@ -577,11 +572,12 @@ public class MoreLikeThisIT extends ESIntegTestCase { for (int i = 0; i < numFields; i++) { docs.add(new Item("test", "type1", i+"")); mltQuery = moreLikeThisQuery() - .like((Item) new Item().doc(doc).index("test").type("type1")) + .like(new Item("test", "type1", doc)) .ignoreLike(docs.toArray(Item.EMPTY_ARRAY)) .minTermFreq(0) .minDocFreq(0) .maxQueryTerms(100) + .include(true) .minimumShouldMatch("0%"); response = client().prepareSearch("test").setTypes("type1").setQuery(mltQuery).get(); assertSearchResponse(response); @@ -628,5 +624,4 @@ public class MoreLikeThisIT extends ESIntegTestCase { assertSearchResponse(response); assertHitCount(response, 1); } - } diff --git a/core/src/test/java/org/elasticsearch/transport/ContextAndHeaderTransportIT.java b/core/src/test/java/org/elasticsearch/transport/ContextAndHeaderTransportIT.java index 8aff7ea5280..a2433a94d8e 100644 --- a/core/src/test/java/org/elasticsearch/transport/ContextAndHeaderTransportIT.java +++ b/core/src/test/java/org/elasticsearch/transport/ContextAndHeaderTransportIT.java @@ -52,6 +52,7 @@ import org.elasticsearch.http.HttpServerTransport; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.GeoShapeQueryBuilder; import org.elasticsearch.index.query.MoreLikeThisQueryBuilder; +import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.TermsLookupQueryBuilder; import org.elasticsearch.plugins.Plugin; @@ -229,7 +230,7 @@ public class ContextAndHeaderTransportIT extends ESIntegTestCase { transportClient().admin().indices().prepareRefresh(lookupIndex, queryIndex).get(); MoreLikeThisQueryBuilder moreLikeThisQueryBuilder = QueryBuilders.moreLikeThisQuery("name") - .addItem(new MoreLikeThisQueryBuilder.Item(lookupIndex, "type", "1")) + .addLikeItem(new Item(lookupIndex, "type", "1")) .minTermFreq(1) .minDocFreq(1); diff --git a/docs/reference/migration/migrate_2_1.asciidoc b/docs/reference/migration/migrate_2_1.asciidoc index a530fc1193d..f3008aaaeab 100644 --- a/docs/reference/migration/migrate_2_1.asciidoc +++ b/docs/reference/migration/migrate_2_1.asciidoc @@ -41,4 +41,12 @@ thought of as a delete operation followed by an index operation. ==== `indices.fielddata.cache.expire` The experimental feature `indices.fielddata.cache.expire` has been removed. -For indices that have this setting configured, this config will be ignored. \ No newline at end of file +For indices that have this setting configured, this config will be ignored. + +=== More Like This + +The MoreLikeThisQueryBuilder#ignoreLike methods have been deprecating in favor +to using the unlike methods. + +MoreLikeThisBuilder#addItem has been deprecated in favor to using +MoreLikeThisBuilder#addLikeItem. \ No newline at end of file