mirror of
synced 2025-02-24 05:44:59 +00:00
Refactors MoreLikeThisQueryBuilder and Parser
Relates to #10217 This PR is against the query-refactoring branch. Closes #13486
This commit is contained in:
@ -350,6 +350,13 @@ public abstract class StreamInput extends InputStream {
return ret;
public String[] readOptionalStringArray() throws IOException {
if (readBoolean()) {
return readStringArray();
return null;
public Map<String, Object> readMap() throws IOException {
@ -316,6 +316,18 @@ public abstract class StreamOutput extends OutputStream {
* Writes a string array, for nullable string, writes false.
public void writeOptionalStringArray(@Nullable String[] array) throws IOException {
if (array == null) {
} else {
public void writeMap(@Nullable Map<String, Object> map) throws IOException {
@ -18,12 +18,17 @@
package org.elasticsearch.index;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.lucene.uid.Versions;
import java.io.IOException;
public enum VersionType {
public enum VersionType implements Writeable<VersionType> {
INTERNAL((byte) 0) {
public boolean isVersionConflictForWrites(long currentVersion, long expectedVersion) {
@ -219,6 +224,8 @@ public enum VersionType {
private final byte value;
private static final VersionType PROTOTYPE = INTERNAL;
VersionType(byte value) {
this.value = value;
@ -304,4 +311,20 @@ public enum VersionType {
throw new IllegalArgumentException("No version type match [" + value + "]");
public VersionType readFrom(StreamInput in) throws IOException {
int ordinal = in.readVInt();
assert (ordinal == 0 || ordinal == 1 || ordinal == 2 || ordinal == 3);
return VersionType.values()[ordinal];
public static VersionType readVersionTypeFrom(StreamInput in) throws IOException {
return PROTOTYPE.readFrom(in);
public void writeTo(StreamOutput out) throws IOException {
@ -19,25 +19,40 @@
package org.elasticsearch.index.query;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Fields;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.termvectors.TermVectorsRequest;
import org.elasticsearch.action.termvectors.*;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
import org.elasticsearch.common.lucene.search.XMoreLikeThis;
import org.elasticsearch.common.lucene.uid.Versions;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.common.xcontent.*;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.*;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.mapper.Uid.createUidAsBytes;
* A more like this query that finds documents that are "like" the provided set of document(s).
@ -46,10 +61,50 @@ import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQueryBuilder> {
public static final String NAME = "mlt";
public static final int DEFAULT_MAX_QUERY_TERMS = XMoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
public static final int DEFAULT_MIN_TERM_FREQ = XMoreLikeThis.DEFAULT_MIN_TERM_FREQ;
public static final int DEFAULT_MIN_DOC_FREQ = XMoreLikeThis.DEFAULT_MIN_DOC_FREQ;
public static final int DEFAULT_MAX_DOC_FREQ = XMoreLikeThis.DEFAULT_MAX_DOC_FREQ;
public static final int DEFAULT_MIN_WORD_LENGTH = XMoreLikeThis.DEFAULT_MIN_WORD_LENGTH;
public static final int DEFAULT_MAX_WORD_LENGTH = XMoreLikeThis.DEFAULT_MAX_WORD_LENGTH;
public static final float DEFAULT_BOOST_TERMS = 0; // no boost terms
public static final boolean DEFAULT_INCLUDE = false;
public static final boolean DEFAULT_FAIL_ON_UNSUPPORTED_FIELDS = true;
// document inputs
private final List<String> fields;
private List<String> likeTexts = new ArrayList<>();
private List<String> unlikeTexts = new ArrayList<>();
private List<Item> likeItems = new ArrayList<>();
private List<Item> unlikeItems = new ArrayList<>();
// term selection parameters
private int maxQueryTerms = DEFAULT_MAX_QUERY_TERMS;
private int minTermFreq = DEFAULT_MIN_TERM_FREQ;
private int minDocFreq = DEFAULT_MIN_DOC_FREQ;
private int maxDocFreq = DEFAULT_MAX_DOC_FREQ;
private int minWordLength = DEFAULT_MIN_WORD_LENGTH;
private int maxWordLength = DEFAULT_MAX_WORD_LENGTH;
private String[] stopWords;
private String analyzer;
// query formation parameters
private String minimumShouldMatch = DEFAULT_MINIMUM_SHOULD_MATCH;
private float boostTerms = DEFAULT_BOOST_TERMS;
private boolean include = DEFAULT_INCLUDE;
// other parameters
private boolean failOnUnsupportedField = DEFAULT_FAIL_ON_UNSUPPORTED_FIELDS;
static final MoreLikeThisQueryBuilder PROTOTYPE = new MoreLikeThisQueryBuilder();
* A single item to be used for a {@link MoreLikeThisQueryBuilder}.
public static final class Item implements ToXContent {
public static final class Item implements ToXContent, Writeable<Item> {
public static final Item[] EMPTY_ARRAY = new Item[0];
public interface Field {
@ -74,6 +129,8 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
private long version = Versions.MATCH_ANY;
private VersionType versionType = VersionType.INTERNAL;
static final Item PROTOTYPE = new Item();
public Item() {
@ -85,7 +142,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
* @param type the type of the document
* @param id and its id
public Item(String index, @Nullable String type, String id) {
public Item(@Nullable String index, @Nullable String type, String id) {
if (id == null) {
throw new IllegalArgumentException("Item requires id to be non-null");
this.index = index;
this.type = type;
this.id = id;
@ -98,10 +158,13 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
* @param type the type to be used for parsing the doc
* @param doc the document specification
public Item(String index, String type, XContentBuilder doc) {
public Item(@Nullable String index, @Nullable String type, XContentBuilder doc) {
if (doc == null) {
throw new IllegalArgumentException("Item requires doc to be non-null");
this.index = index;
this.type = type;
this.doc = doc.bytes();
public String index() {
@ -126,30 +189,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
return id;
public Item id(String id) {
this.id = id;
return this;
public BytesReference doc() {
return doc;
* Sets to a given artificial document, that is a document that is not present in the index.
public Item doc(BytesReference doc) {
this.doc = doc;
return this;
* Sets to a given artificial document, that is a document that is not present in the index.
public Item doc(XContentBuilder doc) {
return this.doc(doc.bytes());
public String[] fields() {
return fields;
@ -217,7 +260,7 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
// for artificial docs to make sure that the id has changed in the item too
if (doc != null) {
termVectorsRequest.doc(doc, true);
this.id = termVectorsRequest.id();
return termVectorsRequest;
@ -239,7 +282,7 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
} else if (parseFieldMatcher.match(currentFieldName, Field.ID)) {
item.id = parser.text();
} else if (parseFieldMatcher.match(currentFieldName, Field.DOC)) {
item.doc = jsonBuilder().copyCurrentStructure(parser).bytes();
} else if (parseFieldMatcher.match(currentFieldName, Field.FIELDS)) {
if (token == XContentParser.Token.START_ARRAY) {
List<String> fields = new ArrayList<>();
@ -270,6 +313,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
throw new ElasticsearchParseException(
"failed to parse More Like This item. either [id] or [doc] can be specified, but not both!");
if (item.id == null && item.doc == null) {
throw new ElasticsearchParseException(
"failed to parse More Like This item. neither [id] nor [doc] is specified!");
return item;
@ -282,7 +329,7 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
if (this.type != null) {
builder.field(Field.TYPE.getPreferredName(), this.type);
if (this.id != null && this.doc == null) {
if (this.id != null) {
builder.field(Field.ID.getPreferredName(), this.id);
if (this.doc != null) {
@ -326,6 +373,45 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
public Item readFrom(StreamInput in) throws IOException {
Item item = new Item();
item.index = in.readOptionalString();
item.type = in.readOptionalString();
if (in.readBoolean()) {
item.doc = (BytesReference) in.readGenericValue();
} else {
item.id = in.readString();
item.fields = in.readOptionalStringArray();
item.perFieldAnalyzer = (Map<String, String>) in.readGenericValue();
item.routing = in.readOptionalString();
item.version = in.readLong();
item.versionType = VersionType.readVersionTypeFrom(in);
return item;
public static Item readItemFrom(StreamInput in) throws IOException {
return PROTOTYPE.readFrom(in);
public void writeTo(StreamOutput out) throws IOException {
out.writeBoolean(doc != null);
if (doc != null) {
} else {
public int hashCode() {
return Objects.hash(index, type, id, doc, Arrays.hashCode(fields), perFieldAnalyzer, routing,
@ -349,36 +435,6 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
public static final String NAME = "mlt";
// document inputs
private List<String> likeTexts = new ArrayList<>();
private List<String> unlikeTexts = new ArrayList<>();
private List<Item> likeItems = new ArrayList<>();
private List<Item> unlikeItems = new ArrayList<>();
private final String[] fields;
// term selection parameters
private int maxQueryTerms = -1;
private int minTermFreq = -1;
private int minDocFreq = -1;
private int maxDocFreq = -1;
private int minWordLength = -1;
private int maxWordLength = -1;
private String[] stopWords = null;
private String analyzer;
// query formation parameters
private String minimumShouldMatch = null;
private float boostTerms = -1;
private Boolean include = null;
// other parameters
private Boolean failOnUnsupportedField;
static final MoreLikeThisQueryBuilder PROTOTYPE = new MoreLikeThisQueryBuilder();
* Constructs a new more like this query which uses the "_all" field.
@ -392,17 +448,34 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
* @param fields the field names that will be used when generating the 'More Like This' query.
public MoreLikeThisQueryBuilder(String... fields) {
* Sets the field names that will be used when generating the 'More Like This' query.
* @param fields the field names that will be used when generating the 'More Like This' query.
public MoreLikeThisQueryBuilder(List<String> fields) {
this.fields = fields;
public List<String> fields() {
return fields;
* Sets the text to use in order to find documents that are "like" this.
* @param likeTexts the text to use when generating the 'More Like This' query.
public MoreLikeThisQueryBuilder like(String... likeTexts) {
this.likeTexts = new ArrayList<>();
return addLikeText(likeTexts);
this.likeTexts = Collections.unmodifiableList(Arrays.asList(likeTexts));
return this;
public List<String> likeTexts() {
return likeTexts;
@ -411,56 +484,36 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
* @param likeItems the documents to use when generating the 'More Like This' query.
public MoreLikeThisQueryBuilder like(Item... likeItems) {
this.likeItems = new ArrayList<>();
return addLikeItem(likeItems);
* Adds some text to use in order to find documents that are "like" this.
public MoreLikeThisQueryBuilder addLikeText(String... likeTexts) {
Collections.addAll(this.likeTexts, likeTexts);
this.likeItems = Collections.unmodifiableList(Arrays.asList(likeItems));
return this;
* Adds a document to use in order to find documents that are "like" this.
public MoreLikeThisQueryBuilder addLikeItem(Item... likeItems) {
Collections.addAll(this.likeItems, likeItems);
return this;
public List<Item> likeItems() {
return likeItems;
* Sets the text from which the terms should not be selected from.
public MoreLikeThisQueryBuilder unlike(String... unlikeTexts) {
this.unlikeTexts = new ArrayList<>();
return addUnlikeText(unlikeTexts);
this.unlikeTexts = Collections.unmodifiableList(Arrays.asList(unlikeTexts));
return this;
public List<String> unlikeTexts() {
return unlikeTexts;
* Sets the documents from which the terms should not be selected from.
public MoreLikeThisQueryBuilder unlike(Item... unlikeItems) {
this.unlikeItems = new ArrayList<>();
return addUnlikeItem(unlikeItems);
* Adds some text to use in order to find documents that are "unlike" this.
public MoreLikeThisQueryBuilder addUnlikeText(String... unlikeTexts) {
Collections.addAll(this.unlikeTexts, unlikeTexts);
this.unlikeItems = Collections.unmodifiableList(Arrays.asList(unlikeItems));
return this;
* Adds a document to use in order to find documents that are "unlike" this.
public MoreLikeThisQueryBuilder addUnlikeItem(Item... unlikeItems) {
Collections.addAll(this.unlikeItems, unlikeItems);
return this;
public List<Item> unlikeItems() {
return unlikeItems;
@ -472,6 +525,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
return this;
public int maxQueryTerms() {
return maxQueryTerms;
* The frequency below which terms will be ignored in the source doc. The default
* frequency is <tt>2</tt>.
@ -481,6 +538,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
return this;
public int minTermFreq() {
return minTermFreq;
* Sets the frequency at which words will be ignored which do not occur in at least this
* many docs. Defaults to <tt>5</tt>.
@ -490,6 +551,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
return this;
public int minDocFreq() {
return minDocFreq;
* Set the maximum frequency in which words may still appear. Words that appear
* in more than this many docs will be ignored. Defaults to unbounded.
@ -499,6 +564,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
return this;
public int maxDocFreq() {
return maxDocFreq;
* Sets the minimum word length below which words will be ignored. Defaults
* to <tt>0</tt>.
@ -508,6 +577,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
return this;
public int minWordLength() {
return minWordLength;
* Sets the maximum word length above which words will be ignored. Defaults to
* unbounded (<tt>0</tt>).
@ -517,6 +590,34 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
return this;
public int maxWordLength() {
return maxWordLength;
* Set the set of stopwords.
* <p/>
* <p>Any word in this set is considered "uninteresting" and ignored. Even if your Analyzer allows stopwords, you
* might want to tell the MoreLikeThis code to ignore them, as for the purposes of document similarity it seems
* reasonable to assume that "a stop word is never interesting".
public MoreLikeThisQueryBuilder stopWords(String... stopWords) {
this.stopWords = stopWords;
return this;
public MoreLikeThisQueryBuilder stopWords(List<String> stopWords) {
if (stopWords == null) {
throw new IllegalArgumentException("requires stopwords to be non-null");
this.stopWords = stopWords.toArray(new String[stopWords.size()]);
return this;
public String[] stopWords() {
return stopWords;
* The analyzer that will be used to analyze the text. Defaults to the analyzer associated with the fied.
@ -525,6 +626,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
return this;
public String analyzer() {
return analyzer;
* Number of terms that must match the generated query expressed in the
* common syntax for minimum should match. Defaults to <tt>30%</tt>.
@ -532,18 +637,29 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
* @see org.elasticsearch.common.lucene.search.Queries#calculateMinShouldMatch(int, String)
public MoreLikeThisQueryBuilder minimumShouldMatch(String minimumShouldMatch) {
if (minimumShouldMatch == null) {
throw new IllegalArgumentException("[" + NAME + "] requires minimum should match to be non-null");
this.minimumShouldMatch = minimumShouldMatch;
return this;
public String minimumShouldMatch() {
return minimumShouldMatch;
* Sets the boost factor to use when boosting terms. Defaults to <tt>1</tt>.
* Sets the boost factor to use when boosting terms. Defaults to <tt>0</tt> (deactivated).
public MoreLikeThisQueryBuilder boostTerms(float boostTerms) {
this.boostTerms = boostTerms;
return this;
public float boostTerms() {
return boostTerms;
* Whether to include the input documents. Defaults to <tt>false</tt>
@ -552,14 +668,22 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
return this;
public boolean include() {
return include;
* Whether to fail or return no result when this query is run against a field which is not supported such as binary/numeric fields.
public MoreLikeThisQueryBuilder failOnUnsupportedField(boolean fail) {
failOnUnsupportedField = fail;
this.failOnUnsupportedField = fail;
return this;
public boolean failOnUnsupportedField() {
return failOnUnsupportedField;
* The text to use in order to find documents that are "like" this.
@ -577,89 +701,32 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
return like(items);
public MoreLikeThisQueryBuilder docs(Item... docs) {
return like(docs);
* Sets the documents from which the terms should not be selected from.
* @Deprecated Use {@link #unlike(Item...)} instead
public MoreLikeThisQueryBuilder ignoreLike(Item... docs) {
return unlike(docs);
* Sets the text from which the terms should not be selected from.
* @Deprecated Use {@link #unlike(String...)} instead.
public MoreLikeThisQueryBuilder ignoreLike(String... likeText) {
return unlike(likeText);
* Adds a document to use in order to find documents that are "like" this.
public MoreLikeThisQueryBuilder addItem(Item... likeItems) {
return addLikeItem(likeItems);
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
if (fields != null) {
builder.field(MoreLikeThisQueryParser.Field.FIELDS.getPreferredName(), fields);
if (this.likeTexts.isEmpty() && this.likeItems.isEmpty()) {
throw new IllegalArgumentException("more_like_this requires '" + MoreLikeThisQueryParser.Field.LIKE.getPreferredName() + "' to be provided");
} else {
buildLikeField(builder, MoreLikeThisQueryParser.Field.LIKE.getPreferredName(), likeTexts, likeItems);
buildLikeField(builder, MoreLikeThisQueryParser.Field.LIKE.getPreferredName(), likeTexts, likeItems);
if (!unlikeTexts.isEmpty() || !unlikeItems.isEmpty()) {
buildLikeField(builder, MoreLikeThisQueryParser.Field.UNLIKE.getPreferredName(), unlikeTexts, unlikeItems);
if (maxQueryTerms != -1) {
builder.field(MoreLikeThisQueryParser.Field.MAX_QUERY_TERMS.getPreferredName(), maxQueryTerms);
if (minTermFreq != -1) {
builder.field(MoreLikeThisQueryParser.Field.MIN_TERM_FREQ.getPreferredName(), minTermFreq);
if (minDocFreq != -1) {
builder.field(MoreLikeThisQueryParser.Field.MIN_DOC_FREQ.getPreferredName(), minDocFreq);
if (maxDocFreq != -1) {
builder.field(MoreLikeThisQueryParser.Field.MAX_DOC_FREQ.getPreferredName(), maxDocFreq);
if (minWordLength != -1) {
builder.field(MoreLikeThisQueryParser.Field.MIN_WORD_LENGTH.getPreferredName(), minWordLength);
if (maxWordLength != -1) {
builder.field(MoreLikeThisQueryParser.Field.MAX_WORD_LENGTH.getPreferredName(), maxWordLength);
if (stopWords != null && stopWords.length > 0) {
builder.field(MoreLikeThisQueryParser.Field.MAX_QUERY_TERMS.getPreferredName(), maxQueryTerms);
builder.field(MoreLikeThisQueryParser.Field.MIN_TERM_FREQ.getPreferredName(), minTermFreq);
builder.field(MoreLikeThisQueryParser.Field.MIN_DOC_FREQ.getPreferredName(), minDocFreq);
builder.field(MoreLikeThisQueryParser.Field.MAX_DOC_FREQ.getPreferredName(), maxDocFreq);
builder.field(MoreLikeThisQueryParser.Field.MIN_WORD_LENGTH.getPreferredName(), minWordLength);
builder.field(MoreLikeThisQueryParser.Field.MAX_WORD_LENGTH.getPreferredName(), maxWordLength);
if (stopWords != null) {
builder.field(MoreLikeThisQueryParser.Field.STOP_WORDS.getPreferredName(), stopWords);
if (analyzer != null) {
builder.field(MoreLikeThisQueryParser.Field.ANALYZER.getPreferredName(), analyzer);
if (minimumShouldMatch != null) {
builder.field(MoreLikeThisQueryParser.Field.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);
if (boostTerms != -1) {
builder.field(MoreLikeThisQueryParser.Field.BOOST_TERMS.getPreferredName(), boostTerms);
if (include != null) {
builder.field(MoreLikeThisQueryParser.Field.INCLUDE.getPreferredName(), include);
if (failOnUnsupportedField != null) {
builder.field(MoreLikeThisQueryParser.Field.FAIL_ON_UNSUPPORTED_FIELD.getPreferredName(), failOnUnsupportedField);
builder.field(MoreLikeThisQueryParser.Field.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);
builder.field(MoreLikeThisQueryParser.Field.BOOST_TERMS.getPreferredName(), boostTerms);
builder.field(MoreLikeThisQueryParser.Field.INCLUDE.getPreferredName(), include);
builder.field(MoreLikeThisQueryParser.Field.FAIL_ON_UNSUPPORTED_FIELD.getPreferredName(), failOnUnsupportedField);
@ -679,4 +746,299 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
public String getWriteableName() {
return NAME;
protected Query doToQuery(QueryShardContext context) throws IOException {
MoreLikeThisQuery mltQuery = new MoreLikeThisQuery();
// set similarity
// set query parameters
if (stopWords != null) {
mltQuery.setStopWords(new HashSet<>(Arrays.asList(stopWords)));
// sets boost terms
if (boostTerms != 0) {
// set analyzer
Analyzer analyzerObj = context.analysisService().analyzer(analyzer);
if (analyzerObj == null) {
analyzerObj = context.mapperService().searchAnalyzer();
// set like text fields
boolean useDefaultField = (fields == null);
List<String> moreLikeFields = new ArrayList<>();
if (useDefaultField) {
moreLikeFields = Collections.singletonList(context.defaultField());
} else {
for (String field : fields) {
MappedFieldType fieldType = context.fieldMapper(field);
moreLikeFields.add(fieldType == null ? field : fieldType.names().indexName());
// possibly remove unsupported fields
removeUnsupportedFields(moreLikeFields, analyzerObj, failOnUnsupportedField);
if (moreLikeFields.isEmpty()) {
return null;
// handle like texts
if (likeTexts.isEmpty() == false) {
if (unlikeTexts.isEmpty() == false) {
// handle items
if (likeItems.isEmpty() == false) {
return handleItems(context, mltQuery, likeItems, unlikeItems, include, moreLikeFields, useDefaultField);
} else {
return mltQuery;
private static List<String> removeUnsupportedFields(List<String> moreLikeFields, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
for (Iterator<String> it = moreLikeFields.iterator(); it.hasNext(); ) {
final String fieldName = it.next();
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
if (failOnUnsupportedField) {
throw new IllegalArgumentException("more_like_this doesn't support binary/numeric fields: [" + fieldName + "]");
} else {
return moreLikeFields;
private Query handleItems(QueryShardContext context, MoreLikeThisQuery mltQuery, List<Item> likeItems, List<Item> unlikeItems,
boolean include, List<String> moreLikeFields, boolean useDefaultField) throws IOException {
// set default index, type and fields if not specified
for (Item item : likeItems) {
setDefaultIndexTypeFields(context, item, moreLikeFields, useDefaultField);
for (Item item : unlikeItems) {
setDefaultIndexTypeFields(context, item, moreLikeFields, useDefaultField);
// fetching the items with multi-termvectors API
MultiTermVectorsResponse responses = fetchResponse(context.getClient(), likeItems, unlikeItems, SearchContext.current());
// getting the Fields for liked items
mltQuery.setLikeText(getFieldsFor(responses, likeItems));
// getting the Fields for unliked items
if (!unlikeItems.isEmpty()) {
org.apache.lucene.index.Fields[] unlikeFields = getFieldsFor(responses, unlikeItems);
if (unlikeFields.length > 0) {
BooleanQuery boolQuery = new BooleanQuery();
boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD);
// exclude the items from the search
if (!include) {
handleExclude(boolQuery, likeItems);
return boolQuery;
private static void setDefaultIndexTypeFields(QueryShardContext context, Item item, List<String> moreLikeFields,
boolean useDefaultField) {
if (item.index() == null) {
if (item.type() == null) {
if (context.queryTypes().size() > 1) {
throw new QueryShardException(context,
"ambiguous type for item with id: " + item.id() + " and index: " + item.index());
} else {
// default fields if not present but don't override for artificial docs
if ((item.fields() == null || item.fields().length == 0) && item.doc() == null) {
if (useDefaultField) {
} else {
item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()]));
private MultiTermVectorsResponse fetchResponse(Client client, List<Item> likeItems, @Nullable List<Item> unlikeItems,
SearchContext searchContext) throws IOException {
MultiTermVectorsRequest request = new MultiTermVectorsRequest();
for (Item item : likeItems) {
if (unlikeItems != null) {
for (Item item : unlikeItems) {
return client.multiTermVectors(request).actionGet();
private static Fields[] getFieldsFor(MultiTermVectorsResponse responses, List<Item> items) throws IOException {
List<Fields> likeFields = new ArrayList<>();
Set<Item> selectedItems = new HashSet<>();
for (Item request : items) {
selectedItems.add(new Item(request.index(), request.type(), request.id()));
for (MultiTermVectorsItemResponse response : responses) {
if (!hasResponseFromRequest(response, selectedItems)) {
if (response.isFailed()) {
TermVectorsResponse getResponse = response.getResponse();
if (!getResponse.isExists()) {
return likeFields.toArray(Fields.EMPTY_ARRAY);
private static boolean hasResponseFromRequest(MultiTermVectorsItemResponse response, Set<Item> selectedItems) {
return selectedItems.contains(new Item(response.getIndex(), response.getType(), response.getId()));
private static void handleExclude(BooleanQuery boolQuery, List<Item> likeItems) {
// artificial docs get assigned a random id and should be disregarded
List<BytesRef> uids = new ArrayList<>();
for (Item item : likeItems) {
if (item.doc() != null) {
uids.add(createUidAsBytes(item.type(), item.id()));
if (!uids.isEmpty()) {
TermsQuery query = new TermsQuery(UidFieldMapper.NAME, uids.toArray(new BytesRef[0]));
boolQuery.add(query, BooleanClause.Occur.MUST_NOT);
public QueryValidationException validate() {
QueryValidationException validationException = null;
if (likeTexts.isEmpty() && likeItems.isEmpty()) {
validationException = addValidationError("requires 'like' to be specified.", validationException);
if (fields != null && fields.isEmpty()) {
validationException = addValidationError("requires 'fields' to be specified", validationException);
return validationException;
protected MoreLikeThisQueryBuilder doReadFrom(StreamInput in) throws IOException {
MoreLikeThisQueryBuilder moreLikeThisQueryBuilder = new MoreLikeThisQueryBuilder((List<String>) in.readGenericValue());
moreLikeThisQueryBuilder.likeTexts = (List<String>) in.readGenericValue();
moreLikeThisQueryBuilder.unlikeTexts = (List<String>) in.readGenericValue();
moreLikeThisQueryBuilder.likeItems = readItems(in);
moreLikeThisQueryBuilder.unlikeItems = readItems(in);
moreLikeThisQueryBuilder.maxQueryTerms = in.readVInt();
moreLikeThisQueryBuilder.minTermFreq = in.readVInt();
moreLikeThisQueryBuilder.minDocFreq = in.readVInt();
moreLikeThisQueryBuilder.maxDocFreq = in.readVInt();
moreLikeThisQueryBuilder.minWordLength = in.readVInt();
moreLikeThisQueryBuilder.maxWordLength = in.readVInt();
moreLikeThisQueryBuilder.stopWords = in.readOptionalStringArray();
moreLikeThisQueryBuilder.analyzer = in.readOptionalString();
moreLikeThisQueryBuilder.minimumShouldMatch = in.readString();
moreLikeThisQueryBuilder.boostTerms = (Float) in.readGenericValue();
moreLikeThisQueryBuilder.include = in.readBoolean();
moreLikeThisQueryBuilder.failOnUnsupportedField = in.readBoolean();
return moreLikeThisQueryBuilder;
private static List<Item> readItems(StreamInput in) throws IOException {
List<Item> items = new ArrayList<>();
int size = in.readVInt();
for (int i = 0; i < size; i++) {
return items;
protected void doWriteTo(StreamOutput out) throws IOException {
writeItems(likeItems, out);
writeItems(unlikeItems, out);
private static void writeItems(List<Item> items, StreamOutput out) throws IOException {
for (Item item : items) {
protected int doHashCode() {
return Objects.hash(fields, likeTexts, unlikeTexts, likeItems, unlikeItems, maxQueryTerms, minTermFreq,
minDocFreq, maxDocFreq, minWordLength, maxWordLength, Arrays.hashCode(stopWords), analyzer, minimumShouldMatch,
boostTerms, include, failOnUnsupportedField);
protected boolean doEquals(MoreLikeThisQueryBuilder other) {
return Objects.equals(fields, other.fields) &&
Objects.equals(likeTexts, other.likeTexts) &&
Objects.equals(unlikeTexts, other.unlikeTexts) &&
Objects.equals(likeItems, other.likeItems) &&
Objects.equals(unlikeItems, other.unlikeItems) &&
Objects.equals(maxQueryTerms, other.maxQueryTerms) &&
Objects.equals(minTermFreq, other.minTermFreq) &&
Objects.equals(minDocFreq, other.minDocFreq) &&
Objects.equals(maxDocFreq, other.maxDocFreq) &&
Objects.equals(minWordLength, other.minWordLength) &&
Objects.equals(maxWordLength, other.maxWordLength) &&
Arrays.equals(stopWords, other.stopWords) && // otherwise we are comparing pointers
Objects.equals(analyzer, other.analyzer) &&
Objects.equals(minimumShouldMatch, other.minimumShouldMatch) &&
Objects.equals(boostTerms, other.boostTerms) &&
Objects.equals(include, other.include) &&
Objects.equals(failOnUnsupportedField, other.failOnUnsupportedField);
@ -19,43 +19,20 @@
package org.elasticsearch.index.query;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Fields;
import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.action.termvectors.*;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import static org.elasticsearch.index.mapper.Uid.createUidAsBytes;
* Parser for the The More Like This Query (MLT Query) which finds documents that are "like" a given set of documents.
* The documents are provided as a set of strings and/or a list of {@link Item}.
public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
public class MoreLikeThisQueryParser extends BaseQueryParser<MoreLikeThisQueryBuilder> {
public interface Field {
ParseField FIELDS = new ParseField("fields");
@ -84,23 +61,34 @@ public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
public Query parse(QueryShardContext context) throws IOException, QueryParsingException {
QueryParseContext parseContext = context.parseContext();
public MoreLikeThisQueryBuilder fromXContent(QueryParseContext parseContext) throws IOException, QueryParsingException {
XContentParser parser = parseContext.parser();
MoreLikeThisQuery mltQuery = new MoreLikeThisQuery();
// document inputs
List<String> fields = null;
List<String> likeTexts = new ArrayList<>();
List<String> unlikeTexts = new ArrayList<>();
List<Item> likeItems = new ArrayList<>();
List<Item> unlikeItems = new ArrayList<>();
List<String> moreLikeFields = null;
Analyzer analyzer = null;
boolean include = false;
// term selection parameters
int maxQueryTerms = MoreLikeThisQueryBuilder.DEFAULT_MAX_QUERY_TERMS;
int minTermFreq = MoreLikeThisQueryBuilder.DEFAULT_MIN_TERM_FREQ;
int minDocFreq = MoreLikeThisQueryBuilder.DEFAULT_MIN_DOC_FREQ;
int maxDocFreq = MoreLikeThisQueryBuilder.DEFAULT_MAX_DOC_FREQ;
int minWordLength = MoreLikeThisQueryBuilder.DEFAULT_MIN_WORD_LENGTH;
int maxWordLength = MoreLikeThisQueryBuilder.DEFAULT_MAX_WORD_LENGTH;
List<String> stopWords = null;
String analyzer = null;
boolean failOnUnsupportedField = true;
// query formation parameters
String minimumShouldMatch = MoreLikeThisQueryBuilder.DEFAULT_MINIMUM_SHOULD_MATCH;
float boostTerms = MoreLikeThisQueryBuilder.DEFAULT_BOOST_TERMS;
boolean include = MoreLikeThisQueryBuilder.DEFAULT_INCLUDE;
// other parameters
boolean failOnUnsupportedField = MoreLikeThisQueryBuilder.DEFAULT_FAIL_ON_UNSUPPORTED_FIELDS;
float boost = AbstractQueryBuilder.DEFAULT_BOOST;
String queryName = null;
XContentParser.Token token;
@ -116,37 +104,29 @@ public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.LIKE_TEXT)) {
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MAX_QUERY_TERMS)) {
maxQueryTerms = parser.intValue();
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MIN_TERM_FREQ)) {
minTermFreq =parser.intValue();
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MIN_DOC_FREQ)) {
minDocFreq = parser.intValue();
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MAX_DOC_FREQ)) {
maxDocFreq = parser.intValue();
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MIN_WORD_LENGTH)) {
minWordLength = parser.intValue();
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MAX_WORD_LENGTH)) {
maxWordLength = parser.intValue();
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.ANALYZER)) {
analyzer = context.analysisService().analyzer(parser.text());
analyzer = parser.text();
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MINIMUM_SHOULD_MATCH)) {
minimumShouldMatch = parser.text();
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.BOOST_TERMS)) {
float boostFactor = parser.floatValue();
if (boostFactor != 0) {
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MINIMUM_SHOULD_MATCH)) {
} else if ("analyzer".equals(currentFieldName)) {
analyzer = context.analysisService().analyzer(parser.text());
boostTerms = parser.floatValue();
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.INCLUDE)) {
include = parser.booleanValue();
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.FAIL_ON_UNSUPPORTED_FIELD)) {
failOnUnsupportedField = parser.booleanValue();
} else if ("boost".equals(currentFieldName)) {
boost = parser.floatValue();
} else if ("_name".equals(currentFieldName)) {
queryName = parser.text();
} else {
@ -154,11 +134,9 @@ public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
} else if (token == XContentParser.Token.START_ARRAY) {
if (parseContext.parseFieldMatcher().match(currentFieldName, Field.FIELDS)) {
moreLikeFields = new LinkedList<>();
fields = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
String field = parser.text();
MappedFieldType fieldType = context.fieldMapper(field);
moreLikeFields.add(fieldType == null ? field : fieldType.names().indexName());
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.LIKE)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
@ -183,11 +161,10 @@ public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
likeItems.add(Item.parse(parser, parseContext.parseFieldMatcher(), new Item()));
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.STOP_WORDS)) {
Set<String> stopWords = new HashSet<>();
stopWords = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
} else {
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
@ -205,48 +182,32 @@ public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
if (likeTexts.isEmpty() && likeItems.isEmpty()) {
throw new QueryParsingException(parseContext, "more_like_this requires 'like' to be specified");
if (moreLikeFields != null && moreLikeFields.isEmpty()) {
if (fields != null && fields.isEmpty()) {
throw new QueryParsingException(parseContext, "more_like_this requires 'fields' to be non-empty");
// set analyzer
if (analyzer == null) {
analyzer = context.mapperService().searchAnalyzer();
// set like text fields
boolean useDefaultField = (moreLikeFields == null);
if (useDefaultField) {
moreLikeFields = Collections.singletonList(context.defaultField());
// possibly remove unsupported fields
removeUnsupportedFields(moreLikeFields, analyzer, failOnUnsupportedField);
if (moreLikeFields.isEmpty()) {
return null;
// support for named query
if (queryName != null) {
context.addNamedQuery(queryName, mltQuery);
// handle like texts
if (!likeTexts.isEmpty()) {
if (!unlikeTexts.isEmpty()) {
// handle items
if (!likeItems.isEmpty()) {
return handleItems(context, mltQuery, likeItems, unlikeItems, include, moreLikeFields, useDefaultField);
} else {
return mltQuery;
MoreLikeThisQueryBuilder moreLikeThisQueryBuilder = new MoreLikeThisQueryBuilder(fields)
.like(likeTexts.toArray(new String[likeTexts.size()]))
.unlike(unlikeTexts.toArray(new String[unlikeTexts.size()]))
.like(likeItems.toArray(new Item[likeItems.size()]))
.unlike(unlikeItems.toArray(new Item[unlikeItems.size()]))
if (stopWords != null) {
return moreLikeThisQueryBuilder;
private static void parseLikeField(QueryParseContext parseContext, List<String> texts, List<Item> items) throws IOException {
@ -260,139 +221,8 @@ public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
private static List<String> removeUnsupportedFields(List<String> moreLikeFields, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
for (Iterator<String> it = moreLikeFields.iterator(); it.hasNext(); ) {
final String fieldName = it.next();
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
if (failOnUnsupportedField) {
throw new IllegalArgumentException("more_like_this doesn't support binary/numeric fields: [" + fieldName + "]");
} else {
return moreLikeFields;
private Query handleItems(QueryShardContext context, MoreLikeThisQuery mltQuery, List<Item> likeItems, List<Item> unlikeItems,
boolean include, List<String> moreLikeFields, boolean useDefaultField) throws IOException {
QueryParseContext parseContext = context.parseContext();
// set default index, type and fields if not specified
for (Item item : likeItems) {
setDefaultIndexTypeFields(parseContext, item, moreLikeFields, useDefaultField);
for (Item item : unlikeItems) {
setDefaultIndexTypeFields(parseContext, item, moreLikeFields, useDefaultField);
// fetching the items with multi-termvectors API
MultiTermVectorsResponse responses = fetchResponse(context.getClient(), likeItems, unlikeItems, SearchContext.current());
// getting the Fields for liked items
mltQuery.setLikeText(getFieldsFor(responses, likeItems));
// getting the Fields for unliked items
if (!unlikeItems.isEmpty()) {
org.apache.lucene.index.Fields[] unlikeFields = getFieldsFor(responses, unlikeItems);
if (unlikeFields.length > 0) {
BooleanQuery boolQuery = new BooleanQuery();
boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD);
// exclude the items from the search
if (!include) {
handleExclude(boolQuery, likeItems);
return boolQuery;
private static void setDefaultIndexTypeFields(QueryParseContext parseContext, Item item, List<String> moreLikeFields,
boolean useDefaultField) {
if (item.index() == null) {
if (item.type() == null) {
if (parseContext.shardContext().queryTypes().size() > 1) {
throw new QueryParsingException(parseContext,
"ambiguous type for item with id: " + item.id() + " and index: " + item.index());
} else {
// default fields if not present but don't override for artificial docs
if ((item.fields() == null || item.fields().length == 0) && item.doc() == null) {
if (useDefaultField) {
} else {
item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()]));
private static void handleExclude(BooleanQuery boolQuery, List<Item> likeItems) {
// artificial docs get assigned a random id and should be disregarded
List<BytesRef> uids = new ArrayList<>();
for (Item item : likeItems) {
if (item.doc() != null) {
uids.add(createUidAsBytes(item.type(), item.id()));
if (!uids.isEmpty()) {
TermsQuery query = new TermsQuery(UidFieldMapper.NAME, uids.toArray(new BytesRef[0]));
boolQuery.add(query, BooleanClause.Occur.MUST_NOT);
public MoreLikeThisQueryBuilder getBuilderPrototype() {
return MoreLikeThisQueryBuilder.PROTOTYPE;
private MultiTermVectorsResponse fetchResponse(Client client, List<Item> likeItems, @Nullable List<Item> unlikeItems,
SearchContext searchContext) throws IOException {
MultiTermVectorsRequest request = new MultiTermVectorsRequest();
for (Item item : likeItems) {
if (unlikeItems != null) {
for (Item item : unlikeItems) {
return client.multiTermVectors(request).actionGet();
private static Fields[] getFieldsFor(MultiTermVectorsResponse responses, List<Item> items) throws IOException {
List<Fields> likeFields = new ArrayList<>();
Set<Item> selectedItems = new HashSet<>();
for (Item request : items) {
selectedItems.add(new Item(request.index(), request.type(), request.id()));
for (MultiTermVectorsItemResponse response : responses) {
if (!hasResponseFromRequest(response, selectedItems)) {
if (response.isFailed()) {
TermVectorsResponse getResponse = response.getResponse();
if (!getResponse.isExists()) {
return likeFields.toArray(Fields.EMPTY_ARRAY);
private static boolean hasResponseFromRequest(MultiTermVectorsItemResponse response, Set<Item> selectedItems) {
return selectedItems.contains(new Item(response.getIndex(), response.getType(), response.getId()));
@ -20,7 +20,6 @@
package org.elasticsearch.index.query;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryparser.classic.MapperQueryParser;
import org.apache.lucene.queryparser.classic.QueryParserSettings;
@ -33,21 +32,15 @@ import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.geo.builders.ShapeBuilder;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.ContentPath;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperBuilders;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.index.mapper.object.ObjectMapper;
import org.elasticsearch.index.query.support.NestedScope;
import org.elasticsearch.indices.cache.query.terms.TermsLookup;
import org.elasticsearch.script.ExecutableScript;
import org.elasticsearch.script.ScriptContext;
import org.elasticsearch.script.ScriptService;
@ -56,11 +49,9 @@ import org.elasticsearch.search.fetch.innerhits.InnerHitsContext;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SearchLookup;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -0,0 +1,289 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.index.query;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.termvectors.*;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
import org.hamcrest.Matchers;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Stream;
import static org.hamcrest.Matchers.is;
public class MoreLikeThisQueryBuilderTests extends AbstractQueryTestCase<MoreLikeThisQueryBuilder> {
private static String[] randomFields;
private static Item[] randomLikeItems;
private static Item[] randomUnlikeItems;
public void setup() {
// MLT only supports string fields, unsupported fields are tested below
randomFields = randomStringFields();
// we also preset the item requests
randomLikeItems = new Item[randomIntBetween(1, 3)];
for (int i = 0; i < randomLikeItems.length; i++) {
randomLikeItems[i] = generateRandomItem();
// and for the unlike items too
randomUnlikeItems = new Item[randomIntBetween(1, 3)];
for (int i = 0; i < randomUnlikeItems.length; i++) {
randomUnlikeItems[i] = generateRandomItem();
private static String[] randomStringFields() {
String[] mappedStringFields = new String[]{STRING_FIELD_NAME, STRING_FIELD_NAME_2};
String[] unmappedStringFields = generateRandomStringArray(2, 5, false, false);
return Stream.concat(Arrays.stream(mappedStringFields), Arrays.stream(unmappedStringFields)).toArray(String[]::new);
private Item generateRandomItem() {
String index = randomBoolean() ? getIndex().getName() : null;
String type = getRandomType(); // set to one type to avoid ambiguous types
// indexed item or artificial document
Item item;
if (randomBoolean()) {
item = new Item(index, type, randomAsciiOfLength(10));
} else {
item = new Item(index, type, randomArtificialDoc());
// if no field is specified MLT uses all mapped fields for this item
if (randomBoolean()) {
// per field analyzer
if (randomBoolean()) {
if (randomBoolean()) {
if (randomBoolean()) {
if (randomBoolean()) {
return item;
private XContentBuilder randomArtificialDoc() {
XContentBuilder doc;
try {
doc = XContentFactory.jsonBuilder().startObject();
for (String field : randomFields) {
doc.field(field, randomAsciiOfLength(10));
} catch (IOException e) {
throw new ElasticsearchException("Unable to generate random artificial doc!");
return doc;
private Map<String, String> randomPerFieldAnalyzer() {
Map<String, String> perFieldAnalyzer = new HashMap<>();
for (String field : randomFields) {
perFieldAnalyzer.put(field, randomAnalyzer());
return perFieldAnalyzer;
protected MoreLikeThisQueryBuilder doCreateTestQueryBuilder() {
MoreLikeThisQueryBuilder queryBuilder;
if (randomBoolean()) { // for the default field
queryBuilder = new MoreLikeThisQueryBuilder();
} else {
queryBuilder = new MoreLikeThisQueryBuilder(randomFields);
// like field is required
if (randomBoolean()) {
queryBuilder.like(generateRandomStringArray(5, 5, false, false));
} else {
if (randomBoolean()) {
queryBuilder.unlike(generateRandomStringArray(5, 5, false, false));
if (randomBoolean()) {
if (randomBoolean()) {
if (randomBoolean()) {
if (randomBoolean()) {
if (randomBoolean()) {
if (randomBoolean()) {
if (randomBoolean()) {
if (randomBoolean()) {
queryBuilder.stopWords(generateRandomStringArray(5, 5, false, false));
if (randomBoolean()) {
queryBuilder.analyzer(randomAnalyzer()); // fix the analyzer?
if (randomBoolean()) {
if (randomBoolean()) {
queryBuilder.boostTerms(randomFloat() * 10);
if (randomBoolean()) {
if (randomBoolean()) {
return queryBuilder;
protected MultiTermVectorsResponse executeMultiTermVectors(MultiTermVectorsRequest mtvRequest) {
try {
MultiTermVectorsItemResponse[] responses = new MultiTermVectorsItemResponse[mtvRequest.size()];
int i = 0;
for (TermVectorsRequest request : mtvRequest) {
TermVectorsResponse response = new TermVectorsResponse(request.index(), request.type(), request.id());
Fields generatedFields;
if (request.doc() != null) {
generatedFields = generateFields(randomFields, request.doc().toUtf8());
} else {
generatedFields = generateFields(request.selectedFields().toArray(new String[0]), request.id());
EnumSet<TermVectorsRequest.Flag> flags = EnumSet.of(TermVectorsRequest.Flag.Positions, TermVectorsRequest.Flag.Offsets);
response.setFields(generatedFields, request.selectedFields(), flags, generatedFields);
responses[i++] = new MultiTermVectorsItemResponse(response, null);
return new MultiTermVectorsResponse(responses);
} catch (IOException ex) {
throw new ElasticsearchException("boom", ex);
* Here we could go overboard and use a pre-generated indexed random document for a given Item,
* but for now we'd prefer to simply return the id as the content of the document and that for
* every field.
private static Fields generateFields(String[] fieldNames, String text) throws IOException {
MemoryIndex index = new MemoryIndex();
for (String fieldName : fieldNames) {
index.addField(fieldName, text, new WhitespaceAnalyzer());
return MultiFields.getFields(index.createSearcher().getIndexReader());
protected void doAssertLuceneQuery(MoreLikeThisQueryBuilder queryBuilder, Query query, QueryShardContext context) throws IOException {
if (!queryBuilder.likeItems().isEmpty()) {
assertThat(query, Matchers.instanceOf(BooleanQuery.class));
} else {
// we rely on integration tests for a deeper check here
assertThat(query, Matchers.instanceOf(MoreLikeThisQuery.class));
public void testValidate() {
MoreLikeThisQueryBuilder queryBuilder = new MoreLikeThisQueryBuilder(Strings.EMPTY_ARRAY);
assertThat(queryBuilder.validate().validationErrors().size(), is(2));
queryBuilder = new MoreLikeThisQueryBuilder(Strings.EMPTY_ARRAY).like("some text");
assertThat(queryBuilder.validate().validationErrors().size(), is(1));
queryBuilder = new MoreLikeThisQueryBuilder("field").like(Strings.EMPTY_ARRAY);
assertThat(queryBuilder.validate().validationErrors().size(), is(1));
queryBuilder = new MoreLikeThisQueryBuilder("field").like(Item.EMPTY_ARRAY);
assertThat(queryBuilder.validate().validationErrors().size(), is(1));
queryBuilder = new MoreLikeThisQueryBuilder("field").like("some text");
public void testUnsupportedFields() throws IOException {
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
String unsupportedField = randomFrom(INT_FIELD_NAME, DOUBLE_FIELD_NAME, DATE_FIELD_NAME);
MoreLikeThisQueryBuilder queryBuilder = new MoreLikeThisQueryBuilder(unsupportedField)
.like("some text")
try {
fail("should have failed with IllegalArgumentException for field: " + unsupportedField);
} catch (IllegalArgumentException e) {
assertThat(e.getMessage(), Matchers.containsString("more_like_this doesn't support binary/numeric fields"));
public void testItemSerialization() throws IOException {
Item expectedItem = generateRandomItem();
BytesStreamOutput output = new BytesStreamOutput();
Item newItem = Item.readItemFrom(StreamInput.wrap(output.bytes()));
assertEquals(expectedItem, newItem);
public void testItemFromXContent() throws IOException {
Item expectedItem = generateRandomItem();
String json = expectedItem.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS).string();
XContentParser parser = XContentFactory.xContent(json).createParser(json);
Item newItem = Item.parse(parser, ParseFieldMatcher.STRICT, new Item());
assertEquals(expectedItem, newItem);
@ -1,60 +0,0 @@
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.elasticsearch.search.morelikethis;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
import org.elasticsearch.test.ESTestCase;
import org.junit.Test;
import java.util.Random;
public class ItemSerializationTests extends ESTestCase {
private Item generateRandomItem(int arraySize, int stringSize) {
String index = randomAsciiOfLength(stringSize);
String type = randomAsciiOfLength(stringSize);
String id = String.valueOf(Math.abs(randomInt()));
String[] fields = generateRandomStringArray(arraySize, stringSize, true);
String routing = randomBoolean() ? randomAsciiOfLength(stringSize) : null;
long version = Math.abs(randomLong());
VersionType versionType = RandomPicks.randomFrom(new Random(), VersionType.values());
return new Item(index, type, id).fields(fields).routing(routing).version(version).versionType(versionType);
public void testItemSerialization() throws Exception {
int numOfTrials = 100;
int maxArraySize = 7;
int maxStringSize = 8;
for (int i = 0; i < numOfTrials; i++) {
Item item1 = generateRandomItem(maxArraySize, maxStringSize);
String json = item1.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS).string();
XContentParser parser = XContentFactory.xContent(json).createParser(json);
Item item2 = Item.parse(parser, ParseFieldMatcher.STRICT, new Item());
assertEquals(item1, item2);
@ -72,7 +72,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
logger.info("Running moreLikeThis");
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(response, 1l);
@ -92,7 +92,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
logger.info("Running moreLikeThis");
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(response, 0l);
@ -119,24 +119,24 @@ public class MoreLikeThisIT extends ESIntegTestCase {
logger.info("Running moreLikeThis on index");
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(response, 2l);
logger.info("Running moreLikeThis on beta shard");
response = client().prepareSearch("beta").setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(response, 1l);
assertThat(response.getHits().getAt(0).id(), equalTo("3"));
logger.info("Running moreLikeThis on release shard");
response = client().prepareSearch("release").setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(response, 1l);
assertThat(response.getHits().getAt(0).id(), equalTo("2"));
logger.info("Running moreLikeThis on alias with node client");
response = internalCluster().clientNodeClient().prepareSearch("beta").setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(response, 1l);
assertThat(response.getHits().getAt(0).id(), equalTo("3"));
@ -156,11 +156,11 @@ public class MoreLikeThisIT extends ESIntegTestCase {
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1"))).get();
new MoreLikeThisQueryBuilder().like(new Item("foo", "bar", "1"))).get();
assertThat(response, notNullValue());
response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1"))).get();
new MoreLikeThisQueryBuilder().like(new Item("foo", "bar", "1"))).get();
assertThat(response, notNullValue());
@ -182,7 +182,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1").routing("2"))).get();
new MoreLikeThisQueryBuilder().like(new Item("foo", "bar", "1").routing("2"))).get();
assertThat(response, notNullValue());
@ -205,7 +205,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1").routing("4000"))).get();
new MoreLikeThisQueryBuilder().like(new Item("foo", "bar", "1").routing("4000"))).get();
assertThat(response, notNullValue());
@ -233,12 +233,12 @@ public class MoreLikeThisIT extends ESIntegTestCase {
// Implicit list of fields -> ignore numeric fields
SearchResponse searchResponse = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)).get();
new MoreLikeThisQueryBuilder().like(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(searchResponse, 1l);
// Explicit list of fields including numeric fields -> fail
new MoreLikeThisQueryBuilder("string_value", "int_value").addLikeItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class);
new MoreLikeThisQueryBuilder("string_value", "int_value").like(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class);
// mlt query with no field -> OK
searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery().likeText("index").minTermFreq(1).minDocFreq(1)).execute().actionGet();
@ -295,16 +295,16 @@ public class MoreLikeThisIT extends ESIntegTestCase {
logger.info("Running More Like This with include true");
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
assertOrderedSearchHits(response, "1", "2");
response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "2")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "2")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
assertOrderedSearchHits(response, "2", "1");
logger.info("Running More Like This with include false");
response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).minimumShouldMatch("0%")).get();
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).minimumShouldMatch("0%")).get();
assertSearchHits(response, "2");
@ -355,7 +355,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
logger.info("Running MoreLikeThis");
MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").include(true).minTermFreq(1).minDocFreq(1)
.addLikeItem(new Item("test", "type0", "0"));
.like(new Item("test", "type0", "0"));
String[] types = new String[numOfTypes];
for (int i = 0; i < numOfTypes; i++) {
@ -573,7 +573,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
docs.add(new Item("test", "type1", i+""));
mltQuery = moreLikeThisQuery()
.like(new Item("test", "type1", doc))
@ -230,7 +230,7 @@ public class ContextAndHeaderTransportIT extends ESIntegTestCase {
transportClient().admin().indices().prepareRefresh(lookupIndex, queryIndex).get();
MoreLikeThisQueryBuilder moreLikeThisQueryBuilder = QueryBuilders.moreLikeThisQuery("name")
.addLikeItem(new Item(lookupIndex, "type", "1"))
.like(new Item(lookupIndex, "type", "1"))
@ -88,3 +88,14 @@ makes the type / path parameter mandatory.
Moving MatchQueryBuilder.Type and MatchQueryBuilder.ZeroTermsQuery enum to MatchQuery.Type.
Also reusing new Operator enum.
==== MoreLikeThisQueryBuilder
Removed `MoreLikeThisQueryBuilder.Item#id(String id)`, `Item#doc(BytesReference doc)`,
`Item#doc(XContentBuilder doc)`. Use provided constructors instead.
Removed `MoreLikeThisQueryBuilder#addLike` and `addUnlike` in favor to using the `like`
and `unlike` methods.
The deprecated `docs(Item... docs)`, `ignoreLike(Item... docs)`,
`ignoreLike(String... likeText)`, `addItem(Item... likeItems)` have been removed.
Reference in New Issue
Block a user