Refactors MoreLikeThisQueryBuilder and Parser
Relates to #10217 This PR is against the query-refactoring branch. Closes #13486
This commit is contained in:
parent
4096244ec2
commit
a13336da54
|
@ -350,6 +350,13 @@ public abstract class StreamInput extends InputStream {
|
|||
return ret;
|
||||
}
|
||||
|
||||
public String[] readOptionalStringArray() throws IOException {
|
||||
if (readBoolean()) {
|
||||
return readStringArray();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@SuppressWarnings("unchecked")
|
||||
public Map<String, Object> readMap() throws IOException {
|
||||
|
|
|
@ -316,6 +316,18 @@ public abstract class StreamOutput extends OutputStream {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a string array, for nullable string, writes false.
|
||||
*/
|
||||
public void writeOptionalStringArray(@Nullable String[] array) throws IOException {
|
||||
if (array == null) {
|
||||
writeBoolean(false);
|
||||
} else {
|
||||
writeBoolean(true);
|
||||
writeStringArray(array);
|
||||
}
|
||||
}
|
||||
|
||||
public void writeMap(@Nullable Map<String, Object> map) throws IOException {
|
||||
writeGenericValue(map);
|
||||
}
|
||||
|
|
|
@ -18,12 +18,17 @@
|
|||
*/
|
||||
package org.elasticsearch.index;
|
||||
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.lucene.uid.Versions;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public enum VersionType {
|
||||
public enum VersionType implements Writeable<VersionType> {
|
||||
INTERNAL((byte) 0) {
|
||||
@Override
|
||||
public boolean isVersionConflictForWrites(long currentVersion, long expectedVersion) {
|
||||
|
@ -219,6 +224,8 @@ public enum VersionType {
|
|||
|
||||
private final byte value;
|
||||
|
||||
private static final VersionType PROTOTYPE = INTERNAL;
|
||||
|
||||
VersionType(byte value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
@ -304,4 +311,20 @@ public enum VersionType {
|
|||
}
|
||||
throw new IllegalArgumentException("No version type match [" + value + "]");
|
||||
}
|
||||
|
||||
@Override
|
||||
public VersionType readFrom(StreamInput in) throws IOException {
|
||||
int ordinal = in.readVInt();
|
||||
assert (ordinal == 0 || ordinal == 1 || ordinal == 2 || ordinal == 3);
|
||||
return VersionType.values()[ordinal];
|
||||
}
|
||||
|
||||
public static VersionType readVersionTypeFrom(StreamInput in) throws IOException {
|
||||
return PROTOTYPE.readFrom(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeVInt(ordinal());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,25 +19,40 @@
|
|||
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.action.termvectors.TermVectorsRequest;
|
||||
import org.elasticsearch.action.termvectors.*;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
|
||||
import org.elasticsearch.common.lucene.search.XMoreLikeThis;
|
||||
import org.elasticsearch.common.lucene.uid.Versions;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
import org.elasticsearch.common.xcontent.*;
|
||||
import org.elasticsearch.index.VersionType;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.index.mapper.Uid.createUidAsBytes;
|
||||
|
||||
/**
|
||||
* A more like this query that finds documents that are "like" the provided set of document(s).
|
||||
|
@ -46,10 +61,50 @@ import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
|||
*/
|
||||
public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQueryBuilder> {
|
||||
|
||||
public static final String NAME = "mlt";
|
||||
|
||||
public static final int DEFAULT_MAX_QUERY_TERMS = XMoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
|
||||
public static final int DEFAULT_MIN_TERM_FREQ = XMoreLikeThis.DEFAULT_MIN_TERM_FREQ;
|
||||
public static final int DEFAULT_MIN_DOC_FREQ = XMoreLikeThis.DEFAULT_MIN_DOC_FREQ;
|
||||
public static final int DEFAULT_MAX_DOC_FREQ = XMoreLikeThis.DEFAULT_MAX_DOC_FREQ;
|
||||
public static final int DEFAULT_MIN_WORD_LENGTH = XMoreLikeThis.DEFAULT_MIN_WORD_LENGTH;
|
||||
public static final int DEFAULT_MAX_WORD_LENGTH = XMoreLikeThis.DEFAULT_MAX_WORD_LENGTH;
|
||||
public static final String DEFAULT_MINIMUM_SHOULD_MATCH = MoreLikeThisQuery.DEFAULT_MINIMUM_SHOULD_MATCH;
|
||||
public static final float DEFAULT_BOOST_TERMS = 0; // no boost terms
|
||||
public static final boolean DEFAULT_INCLUDE = false;
|
||||
public static final boolean DEFAULT_FAIL_ON_UNSUPPORTED_FIELDS = true;
|
||||
|
||||
// document inputs
|
||||
private final List<String> fields;
|
||||
private List<String> likeTexts = new ArrayList<>();
|
||||
private List<String> unlikeTexts = new ArrayList<>();
|
||||
private List<Item> likeItems = new ArrayList<>();
|
||||
private List<Item> unlikeItems = new ArrayList<>();
|
||||
|
||||
// term selection parameters
|
||||
private int maxQueryTerms = DEFAULT_MAX_QUERY_TERMS;
|
||||
private int minTermFreq = DEFAULT_MIN_TERM_FREQ;
|
||||
private int minDocFreq = DEFAULT_MIN_DOC_FREQ;
|
||||
private int maxDocFreq = DEFAULT_MAX_DOC_FREQ;
|
||||
private int minWordLength = DEFAULT_MIN_WORD_LENGTH;
|
||||
private int maxWordLength = DEFAULT_MAX_WORD_LENGTH;
|
||||
private String[] stopWords;
|
||||
private String analyzer;
|
||||
|
||||
// query formation parameters
|
||||
private String minimumShouldMatch = DEFAULT_MINIMUM_SHOULD_MATCH;
|
||||
private float boostTerms = DEFAULT_BOOST_TERMS;
|
||||
private boolean include = DEFAULT_INCLUDE;
|
||||
|
||||
// other parameters
|
||||
private boolean failOnUnsupportedField = DEFAULT_FAIL_ON_UNSUPPORTED_FIELDS;
|
||||
|
||||
static final MoreLikeThisQueryBuilder PROTOTYPE = new MoreLikeThisQueryBuilder();
|
||||
|
||||
/**
|
||||
* A single item to be used for a {@link MoreLikeThisQueryBuilder}.
|
||||
*/
|
||||
public static final class Item implements ToXContent {
|
||||
public static final class Item implements ToXContent, Writeable<Item> {
|
||||
public static final Item[] EMPTY_ARRAY = new Item[0];
|
||||
|
||||
public interface Field {
|
||||
|
@ -74,6 +129,8 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
private long version = Versions.MATCH_ANY;
|
||||
private VersionType versionType = VersionType.INTERNAL;
|
||||
|
||||
static final Item PROTOTYPE = new Item();
|
||||
|
||||
public Item() {
|
||||
|
||||
}
|
||||
|
@ -85,7 +142,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
* @param type the type of the document
|
||||
* @param id and its id
|
||||
*/
|
||||
public Item(String index, @Nullable String type, String id) {
|
||||
public Item(@Nullable String index, @Nullable String type, String id) {
|
||||
if (id == null) {
|
||||
throw new IllegalArgumentException("Item requires id to be non-null");
|
||||
}
|
||||
this.index = index;
|
||||
this.type = type;
|
||||
this.id = id;
|
||||
|
@ -98,10 +158,13 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
* @param type the type to be used for parsing the doc
|
||||
* @param doc the document specification
|
||||
*/
|
||||
public Item(String index, String type, XContentBuilder doc) {
|
||||
public Item(@Nullable String index, @Nullable String type, XContentBuilder doc) {
|
||||
if (doc == null) {
|
||||
throw new IllegalArgumentException("Item requires doc to be non-null");
|
||||
}
|
||||
this.index = index;
|
||||
this.type = type;
|
||||
this.doc(doc);
|
||||
this.doc = doc.bytes();
|
||||
}
|
||||
|
||||
public String index() {
|
||||
|
@ -126,30 +189,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
return id;
|
||||
}
|
||||
|
||||
public Item id(String id) {
|
||||
this.id = id;
|
||||
return this;
|
||||
}
|
||||
|
||||
public BytesReference doc() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets to a given artificial document, that is a document that is not present in the index.
|
||||
*/
|
||||
public Item doc(BytesReference doc) {
|
||||
this.doc = doc;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets to a given artificial document, that is a document that is not present in the index.
|
||||
*/
|
||||
public Item doc(XContentBuilder doc) {
|
||||
return this.doc(doc.bytes());
|
||||
}
|
||||
|
||||
public String[] fields() {
|
||||
return fields;
|
||||
}
|
||||
|
@ -217,7 +260,7 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
// for artificial docs to make sure that the id has changed in the item too
|
||||
if (doc != null) {
|
||||
termVectorsRequest.doc(doc, true);
|
||||
this.id(termVectorsRequest.id());
|
||||
this.id = termVectorsRequest.id();
|
||||
}
|
||||
return termVectorsRequest;
|
||||
}
|
||||
|
@ -239,7 +282,7 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
} else if (parseFieldMatcher.match(currentFieldName, Field.ID)) {
|
||||
item.id = parser.text();
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Field.DOC)) {
|
||||
item.doc(jsonBuilder().copyCurrentStructure(parser));
|
||||
item.doc = jsonBuilder().copyCurrentStructure(parser).bytes();
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Field.FIELDS)) {
|
||||
if (token == XContentParser.Token.START_ARRAY) {
|
||||
List<String> fields = new ArrayList<>();
|
||||
|
@ -270,6 +313,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
throw new ElasticsearchParseException(
|
||||
"failed to parse More Like This item. either [id] or [doc] can be specified, but not both!");
|
||||
}
|
||||
if (item.id == null && item.doc == null) {
|
||||
throw new ElasticsearchParseException(
|
||||
"failed to parse More Like This item. neither [id] nor [doc] is specified!");
|
||||
}
|
||||
return item;
|
||||
}
|
||||
|
||||
|
@ -282,7 +329,7 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
if (this.type != null) {
|
||||
builder.field(Field.TYPE.getPreferredName(), this.type);
|
||||
}
|
||||
if (this.id != null && this.doc == null) {
|
||||
if (this.id != null) {
|
||||
builder.field(Field.ID.getPreferredName(), this.id);
|
||||
}
|
||||
if (this.doc != null) {
|
||||
|
@ -326,6 +373,45 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Item readFrom(StreamInput in) throws IOException {
|
||||
Item item = new Item();
|
||||
item.index = in.readOptionalString();
|
||||
item.type = in.readOptionalString();
|
||||
if (in.readBoolean()) {
|
||||
item.doc = (BytesReference) in.readGenericValue();
|
||||
} else {
|
||||
item.id = in.readString();
|
||||
}
|
||||
item.fields = in.readOptionalStringArray();
|
||||
item.perFieldAnalyzer = (Map<String, String>) in.readGenericValue();
|
||||
item.routing = in.readOptionalString();
|
||||
item.version = in.readLong();
|
||||
item.versionType = VersionType.readVersionTypeFrom(in);
|
||||
return item;
|
||||
}
|
||||
|
||||
public static Item readItemFrom(StreamInput in) throws IOException {
|
||||
return PROTOTYPE.readFrom(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeOptionalString(index);
|
||||
out.writeOptionalString(type);
|
||||
out.writeBoolean(doc != null);
|
||||
if (doc != null) {
|
||||
out.writeGenericValue(doc);
|
||||
} else {
|
||||
out.writeString(id);
|
||||
}
|
||||
out.writeOptionalStringArray(fields);
|
||||
out.writeGenericValue(perFieldAnalyzer);
|
||||
out.writeOptionalString(routing);
|
||||
out.writeLong(version);
|
||||
versionType.writeTo(out);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(index, type, id, doc, Arrays.hashCode(fields), perFieldAnalyzer, routing,
|
||||
|
@ -349,36 +435,6 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
}
|
||||
}
|
||||
|
||||
public static final String NAME = "mlt";
|
||||
|
||||
// document inputs
|
||||
private List<String> likeTexts = new ArrayList<>();
|
||||
private List<String> unlikeTexts = new ArrayList<>();
|
||||
private List<Item> likeItems = new ArrayList<>();
|
||||
private List<Item> unlikeItems = new ArrayList<>();
|
||||
private final String[] fields;
|
||||
|
||||
// term selection parameters
|
||||
private int maxQueryTerms = -1;
|
||||
private int minTermFreq = -1;
|
||||
private int minDocFreq = -1;
|
||||
private int maxDocFreq = -1;
|
||||
private int minWordLength = -1;
|
||||
private int maxWordLength = -1;
|
||||
private String[] stopWords = null;
|
||||
private String analyzer;
|
||||
|
||||
// query formation parameters
|
||||
private String minimumShouldMatch = null;
|
||||
private float boostTerms = -1;
|
||||
private Boolean include = null;
|
||||
|
||||
// other parameters
|
||||
private Boolean failOnUnsupportedField;
|
||||
|
||||
static final MoreLikeThisQueryBuilder PROTOTYPE = new MoreLikeThisQueryBuilder();
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a new more like this query which uses the "_all" field.
|
||||
*/
|
||||
|
@ -392,17 +448,34 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
* @param fields the field names that will be used when generating the 'More Like This' query.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder(String... fields) {
|
||||
this(Collections.unmodifiableList(Arrays.asList(fields)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the field names that will be used when generating the 'More Like This' query.
|
||||
*
|
||||
* @param fields the field names that will be used when generating the 'More Like This' query.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder(List<String> fields) {
|
||||
this.fields = fields;
|
||||
}
|
||||
|
||||
public List<String> fields() {
|
||||
return fields;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the text to use in order to find documents that are "like" this.
|
||||
*
|
||||
* @param likeTexts the text to use when generating the 'More Like This' query.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder like(String... likeTexts) {
|
||||
this.likeTexts = new ArrayList<>();
|
||||
return addLikeText(likeTexts);
|
||||
this.likeTexts = Collections.unmodifiableList(Arrays.asList(likeTexts));
|
||||
return this;
|
||||
}
|
||||
|
||||
public List<String> likeTexts() {
|
||||
return likeTexts;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -411,56 +484,36 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
* @param likeItems the documents to use when generating the 'More Like This' query.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder like(Item... likeItems) {
|
||||
this.likeItems = new ArrayList<>();
|
||||
return addLikeItem(likeItems);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds some text to use in order to find documents that are "like" this.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder addLikeText(String... likeTexts) {
|
||||
Collections.addAll(this.likeTexts, likeTexts);
|
||||
this.likeItems = Collections.unmodifiableList(Arrays.asList(likeItems));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a document to use in order to find documents that are "like" this.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder addLikeItem(Item... likeItems) {
|
||||
Collections.addAll(this.likeItems, likeItems);
|
||||
return this;
|
||||
public List<Item> likeItems() {
|
||||
return likeItems;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the text from which the terms should not be selected from.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder unlike(String... unlikeTexts) {
|
||||
this.unlikeTexts = new ArrayList<>();
|
||||
return addUnlikeText(unlikeTexts);
|
||||
this.unlikeTexts = Collections.unmodifiableList(Arrays.asList(unlikeTexts));
|
||||
return this;
|
||||
}
|
||||
|
||||
public List<String> unlikeTexts() {
|
||||
return unlikeTexts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the documents from which the terms should not be selected from.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder unlike(Item... unlikeItems) {
|
||||
this.unlikeItems = new ArrayList<>();
|
||||
return addUnlikeItem(unlikeItems);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds some text to use in order to find documents that are "unlike" this.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder addUnlikeText(String... unlikeTexts) {
|
||||
Collections.addAll(this.unlikeTexts, unlikeTexts);
|
||||
this.unlikeItems = Collections.unmodifiableList(Arrays.asList(unlikeItems));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a document to use in order to find documents that are "unlike" this.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder addUnlikeItem(Item... unlikeItems) {
|
||||
Collections.addAll(this.unlikeItems, unlikeItems);
|
||||
return this;
|
||||
public List<Item> unlikeItems() {
|
||||
return unlikeItems;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -472,6 +525,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
return this;
|
||||
}
|
||||
|
||||
public int maxQueryTerms() {
|
||||
return maxQueryTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* The frequency below which terms will be ignored in the source doc. The default
|
||||
* frequency is <tt>2</tt>.
|
||||
|
@ -481,6 +538,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
return this;
|
||||
}
|
||||
|
||||
public int minTermFreq() {
|
||||
return minTermFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the frequency at which words will be ignored which do not occur in at least this
|
||||
* many docs. Defaults to <tt>5</tt>.
|
||||
|
@ -490,6 +551,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
return this;
|
||||
}
|
||||
|
||||
public int minDocFreq() {
|
||||
return minDocFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the maximum frequency in which words may still appear. Words that appear
|
||||
* in more than this many docs will be ignored. Defaults to unbounded.
|
||||
|
@ -499,6 +564,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
return this;
|
||||
}
|
||||
|
||||
public int maxDocFreq() {
|
||||
return maxDocFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the minimum word length below which words will be ignored. Defaults
|
||||
* to <tt>0</tt>.
|
||||
|
@ -508,6 +577,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
return this;
|
||||
}
|
||||
|
||||
public int minWordLength() {
|
||||
return minWordLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the maximum word length above which words will be ignored. Defaults to
|
||||
* unbounded (<tt>0</tt>).
|
||||
|
@ -517,6 +590,34 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
return this;
|
||||
}
|
||||
|
||||
public int maxWordLength() {
|
||||
return maxWordLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the set of stopwords.
|
||||
* <p/>
|
||||
* <p>Any word in this set is considered "uninteresting" and ignored. Even if your Analyzer allows stopwords, you
|
||||
* might want to tell the MoreLikeThis code to ignore them, as for the purposes of document similarity it seems
|
||||
* reasonable to assume that "a stop word is never interesting".
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder stopWords(String... stopWords) {
|
||||
this.stopWords = stopWords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public MoreLikeThisQueryBuilder stopWords(List<String> stopWords) {
|
||||
if (stopWords == null) {
|
||||
throw new IllegalArgumentException("requires stopwords to be non-null");
|
||||
}
|
||||
this.stopWords = stopWords.toArray(new String[stopWords.size()]);
|
||||
return this;
|
||||
}
|
||||
|
||||
public String[] stopWords() {
|
||||
return stopWords;
|
||||
}
|
||||
|
||||
/**
|
||||
* The analyzer that will be used to analyze the text. Defaults to the analyzer associated with the fied.
|
||||
*/
|
||||
|
@ -525,6 +626,10 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
return this;
|
||||
}
|
||||
|
||||
public String analyzer() {
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of terms that must match the generated query expressed in the
|
||||
* common syntax for minimum should match. Defaults to <tt>30%</tt>.
|
||||
|
@ -532,18 +637,29 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
* @see org.elasticsearch.common.lucene.search.Queries#calculateMinShouldMatch(int, String)
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder minimumShouldMatch(String minimumShouldMatch) {
|
||||
if (minimumShouldMatch == null) {
|
||||
throw new IllegalArgumentException("[" + NAME + "] requires minimum should match to be non-null");
|
||||
}
|
||||
this.minimumShouldMatch = minimumShouldMatch;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String minimumShouldMatch() {
|
||||
return minimumShouldMatch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the boost factor to use when boosting terms. Defaults to <tt>1</tt>.
|
||||
* Sets the boost factor to use when boosting terms. Defaults to <tt>0</tt> (deactivated).
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder boostTerms(float boostTerms) {
|
||||
this.boostTerms = boostTerms;
|
||||
return this;
|
||||
}
|
||||
|
||||
public float boostTerms() {
|
||||
return boostTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to include the input documents. Defaults to <tt>false</tt>
|
||||
*/
|
||||
|
@ -552,14 +668,22 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
return this;
|
||||
}
|
||||
|
||||
public boolean include() {
|
||||
return include;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to fail or return no result when this query is run against a field which is not supported such as binary/numeric fields.
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder failOnUnsupportedField(boolean fail) {
|
||||
failOnUnsupportedField = fail;
|
||||
this.failOnUnsupportedField = fail;
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean failOnUnsupportedField() {
|
||||
return failOnUnsupportedField;
|
||||
}
|
||||
|
||||
/**
|
||||
* The text to use in order to find documents that are "like" this.
|
||||
*/
|
||||
|
@ -577,89 +701,32 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
return like(items);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder docs(Item... docs) {
|
||||
return like(docs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the documents from which the terms should not be selected from.
|
||||
*
|
||||
* @Deprecated Use {@link #unlike(Item...)} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder ignoreLike(Item... docs) {
|
||||
return unlike(docs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the text from which the terms should not be selected from.
|
||||
*
|
||||
* @Deprecated Use {@link #unlike(String...)} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder ignoreLike(String... likeText) {
|
||||
return unlike(likeText);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a document to use in order to find documents that are "like" this.
|
||||
*/
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder addItem(Item... likeItems) {
|
||||
return addLikeItem(likeItems);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject(NAME);
|
||||
if (fields != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.FIELDS.getPreferredName(), fields);
|
||||
}
|
||||
if (this.likeTexts.isEmpty() && this.likeItems.isEmpty()) {
|
||||
throw new IllegalArgumentException("more_like_this requires '" + MoreLikeThisQueryParser.Field.LIKE.getPreferredName() + "' to be provided");
|
||||
} else {
|
||||
buildLikeField(builder, MoreLikeThisQueryParser.Field.LIKE.getPreferredName(), likeTexts, likeItems);
|
||||
}
|
||||
buildLikeField(builder, MoreLikeThisQueryParser.Field.LIKE.getPreferredName(), likeTexts, likeItems);
|
||||
if (!unlikeTexts.isEmpty() || !unlikeItems.isEmpty()) {
|
||||
buildLikeField(builder, MoreLikeThisQueryParser.Field.UNLIKE.getPreferredName(), unlikeTexts, unlikeItems);
|
||||
}
|
||||
if (maxQueryTerms != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.MAX_QUERY_TERMS.getPreferredName(), maxQueryTerms);
|
||||
}
|
||||
if (minTermFreq != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.MIN_TERM_FREQ.getPreferredName(), minTermFreq);
|
||||
}
|
||||
if (minDocFreq != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.MIN_DOC_FREQ.getPreferredName(), minDocFreq);
|
||||
}
|
||||
if (maxDocFreq != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.MAX_DOC_FREQ.getPreferredName(), maxDocFreq);
|
||||
}
|
||||
if (minWordLength != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.MIN_WORD_LENGTH.getPreferredName(), minWordLength);
|
||||
}
|
||||
if (maxWordLength != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.MAX_WORD_LENGTH.getPreferredName(), maxWordLength);
|
||||
}
|
||||
if (stopWords != null && stopWords.length > 0) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.MAX_QUERY_TERMS.getPreferredName(), maxQueryTerms);
|
||||
builder.field(MoreLikeThisQueryParser.Field.MIN_TERM_FREQ.getPreferredName(), minTermFreq);
|
||||
builder.field(MoreLikeThisQueryParser.Field.MIN_DOC_FREQ.getPreferredName(), minDocFreq);
|
||||
builder.field(MoreLikeThisQueryParser.Field.MAX_DOC_FREQ.getPreferredName(), maxDocFreq);
|
||||
builder.field(MoreLikeThisQueryParser.Field.MIN_WORD_LENGTH.getPreferredName(), minWordLength);
|
||||
builder.field(MoreLikeThisQueryParser.Field.MAX_WORD_LENGTH.getPreferredName(), maxWordLength);
|
||||
if (stopWords != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.STOP_WORDS.getPreferredName(), stopWords);
|
||||
}
|
||||
if (analyzer != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.ANALYZER.getPreferredName(), analyzer);
|
||||
}
|
||||
if (minimumShouldMatch != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);
|
||||
}
|
||||
if (boostTerms != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.BOOST_TERMS.getPreferredName(), boostTerms);
|
||||
}
|
||||
if (include != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.INCLUDE.getPreferredName(), include);
|
||||
}
|
||||
if (failOnUnsupportedField != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Field.FAIL_ON_UNSUPPORTED_FIELD.getPreferredName(), failOnUnsupportedField);
|
||||
}
|
||||
builder.field(MoreLikeThisQueryParser.Field.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);
|
||||
builder.field(MoreLikeThisQueryParser.Field.BOOST_TERMS.getPreferredName(), boostTerms);
|
||||
builder.field(MoreLikeThisQueryParser.Field.INCLUDE.getPreferredName(), include);
|
||||
builder.field(MoreLikeThisQueryParser.Field.FAIL_ON_UNSUPPORTED_FIELD.getPreferredName(), failOnUnsupportedField);
|
||||
printBoostAndQueryName(builder);
|
||||
builder.endObject();
|
||||
}
|
||||
|
@ -679,4 +746,299 @@ public class MoreLikeThisQueryBuilder extends AbstractQueryBuilder<MoreLikeThisQ
|
|||
public String getWriteableName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query doToQuery(QueryShardContext context) throws IOException {
|
||||
MoreLikeThisQuery mltQuery = new MoreLikeThisQuery();
|
||||
|
||||
// set similarity
|
||||
mltQuery.setSimilarity(context.searchSimilarity());
|
||||
|
||||
// set query parameters
|
||||
mltQuery.setMaxQueryTerms(maxQueryTerms);
|
||||
mltQuery.setMinTermFrequency(minTermFreq);
|
||||
mltQuery.setMinDocFreq(minDocFreq);
|
||||
mltQuery.setMaxDocFreq(maxDocFreq);
|
||||
mltQuery.setMinWordLen(minWordLength);
|
||||
mltQuery.setMaxWordLen(maxWordLength);
|
||||
mltQuery.setMinimumShouldMatch(minimumShouldMatch);
|
||||
if (stopWords != null) {
|
||||
mltQuery.setStopWords(new HashSet<>(Arrays.asList(stopWords)));
|
||||
}
|
||||
|
||||
// sets boost terms
|
||||
if (boostTerms != 0) {
|
||||
mltQuery.setBoostTerms(true);
|
||||
mltQuery.setBoostTermsFactor(boostTerms);
|
||||
}
|
||||
|
||||
// set analyzer
|
||||
Analyzer analyzerObj = context.analysisService().analyzer(analyzer);
|
||||
if (analyzerObj == null) {
|
||||
analyzerObj = context.mapperService().searchAnalyzer();
|
||||
}
|
||||
mltQuery.setAnalyzer(analyzerObj);
|
||||
|
||||
// set like text fields
|
||||
boolean useDefaultField = (fields == null);
|
||||
List<String> moreLikeFields = new ArrayList<>();
|
||||
if (useDefaultField) {
|
||||
moreLikeFields = Collections.singletonList(context.defaultField());
|
||||
} else {
|
||||
for (String field : fields) {
|
||||
MappedFieldType fieldType = context.fieldMapper(field);
|
||||
moreLikeFields.add(fieldType == null ? field : fieldType.names().indexName());
|
||||
}
|
||||
}
|
||||
|
||||
// possibly remove unsupported fields
|
||||
removeUnsupportedFields(moreLikeFields, analyzerObj, failOnUnsupportedField);
|
||||
if (moreLikeFields.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY));
|
||||
|
||||
// handle like texts
|
||||
if (likeTexts.isEmpty() == false) {
|
||||
mltQuery.setLikeText(likeTexts);
|
||||
}
|
||||
if (unlikeTexts.isEmpty() == false) {
|
||||
mltQuery.setUnlikeText(unlikeTexts);
|
||||
}
|
||||
|
||||
// handle items
|
||||
if (likeItems.isEmpty() == false) {
|
||||
return handleItems(context, mltQuery, likeItems, unlikeItems, include, moreLikeFields, useDefaultField);
|
||||
} else {
|
||||
return mltQuery;
|
||||
}
|
||||
}
|
||||
|
||||
private static List<String> removeUnsupportedFields(List<String> moreLikeFields, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
|
||||
for (Iterator<String> it = moreLikeFields.iterator(); it.hasNext(); ) {
|
||||
final String fieldName = it.next();
|
||||
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
|
||||
if (failOnUnsupportedField) {
|
||||
throw new IllegalArgumentException("more_like_this doesn't support binary/numeric fields: [" + fieldName + "]");
|
||||
} else {
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
return moreLikeFields;
|
||||
}
|
||||
|
||||
private Query handleItems(QueryShardContext context, MoreLikeThisQuery mltQuery, List<Item> likeItems, List<Item> unlikeItems,
|
||||
boolean include, List<String> moreLikeFields, boolean useDefaultField) throws IOException {
|
||||
// set default index, type and fields if not specified
|
||||
for (Item item : likeItems) {
|
||||
setDefaultIndexTypeFields(context, item, moreLikeFields, useDefaultField);
|
||||
}
|
||||
for (Item item : unlikeItems) {
|
||||
setDefaultIndexTypeFields(context, item, moreLikeFields, useDefaultField);
|
||||
}
|
||||
|
||||
// fetching the items with multi-termvectors API
|
||||
MultiTermVectorsResponse responses = fetchResponse(context.getClient(), likeItems, unlikeItems, SearchContext.current());
|
||||
|
||||
// getting the Fields for liked items
|
||||
mltQuery.setLikeText(getFieldsFor(responses, likeItems));
|
||||
|
||||
// getting the Fields for unliked items
|
||||
if (!unlikeItems.isEmpty()) {
|
||||
org.apache.lucene.index.Fields[] unlikeFields = getFieldsFor(responses, unlikeItems);
|
||||
if (unlikeFields.length > 0) {
|
||||
mltQuery.setUnlikeText(unlikeFields);
|
||||
}
|
||||
}
|
||||
|
||||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD);
|
||||
|
||||
// exclude the items from the search
|
||||
if (!include) {
|
||||
handleExclude(boolQuery, likeItems);
|
||||
}
|
||||
return boolQuery;
|
||||
}
|
||||
|
||||
private static void setDefaultIndexTypeFields(QueryShardContext context, Item item, List<String> moreLikeFields,
|
||||
boolean useDefaultField) {
|
||||
if (item.index() == null) {
|
||||
item.index(context.index().name());
|
||||
}
|
||||
if (item.type() == null) {
|
||||
if (context.queryTypes().size() > 1) {
|
||||
throw new QueryShardException(context,
|
||||
"ambiguous type for item with id: " + item.id() + " and index: " + item.index());
|
||||
} else {
|
||||
item.type(context.queryTypes().iterator().next());
|
||||
}
|
||||
}
|
||||
// default fields if not present but don't override for artificial docs
|
||||
if ((item.fields() == null || item.fields().length == 0) && item.doc() == null) {
|
||||
if (useDefaultField) {
|
||||
item.fields("*");
|
||||
} else {
|
||||
item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private MultiTermVectorsResponse fetchResponse(Client client, List<Item> likeItems, @Nullable List<Item> unlikeItems,
|
||||
SearchContext searchContext) throws IOException {
|
||||
MultiTermVectorsRequest request = new MultiTermVectorsRequest();
|
||||
for (Item item : likeItems) {
|
||||
request.add(item.toTermVectorsRequest());
|
||||
}
|
||||
if (unlikeItems != null) {
|
||||
for (Item item : unlikeItems) {
|
||||
request.add(item.toTermVectorsRequest());
|
||||
}
|
||||
}
|
||||
request.copyContextAndHeadersFrom(searchContext);
|
||||
return client.multiTermVectors(request).actionGet();
|
||||
}
|
||||
|
||||
private static Fields[] getFieldsFor(MultiTermVectorsResponse responses, List<Item> items) throws IOException {
|
||||
List<Fields> likeFields = new ArrayList<>();
|
||||
|
||||
Set<Item> selectedItems = new HashSet<>();
|
||||
for (Item request : items) {
|
||||
selectedItems.add(new Item(request.index(), request.type(), request.id()));
|
||||
}
|
||||
|
||||
for (MultiTermVectorsItemResponse response : responses) {
|
||||
if (!hasResponseFromRequest(response, selectedItems)) {
|
||||
continue;
|
||||
}
|
||||
if (response.isFailed()) {
|
||||
continue;
|
||||
}
|
||||
TermVectorsResponse getResponse = response.getResponse();
|
||||
if (!getResponse.isExists()) {
|
||||
continue;
|
||||
}
|
||||
likeFields.add(getResponse.getFields());
|
||||
}
|
||||
return likeFields.toArray(Fields.EMPTY_ARRAY);
|
||||
}
|
||||
|
||||
private static boolean hasResponseFromRequest(MultiTermVectorsItemResponse response, Set<Item> selectedItems) {
|
||||
return selectedItems.contains(new Item(response.getIndex(), response.getType(), response.getId()));
|
||||
}
|
||||
|
||||
private static void handleExclude(BooleanQuery boolQuery, List<Item> likeItems) {
|
||||
// artificial docs get assigned a random id and should be disregarded
|
||||
List<BytesRef> uids = new ArrayList<>();
|
||||
for (Item item : likeItems) {
|
||||
if (item.doc() != null) {
|
||||
continue;
|
||||
}
|
||||
uids.add(createUidAsBytes(item.type(), item.id()));
|
||||
}
|
||||
if (!uids.isEmpty()) {
|
||||
TermsQuery query = new TermsQuery(UidFieldMapper.NAME, uids.toArray(new BytesRef[0]));
|
||||
boolQuery.add(query, BooleanClause.Occur.MUST_NOT);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryValidationException validate() {
|
||||
QueryValidationException validationException = null;
|
||||
if (likeTexts.isEmpty() && likeItems.isEmpty()) {
|
||||
validationException = addValidationError("requires 'like' to be specified.", validationException);
|
||||
}
|
||||
if (fields != null && fields.isEmpty()) {
|
||||
validationException = addValidationError("requires 'fields' to be specified", validationException);
|
||||
}
|
||||
return validationException;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MoreLikeThisQueryBuilder doReadFrom(StreamInput in) throws IOException {
|
||||
MoreLikeThisQueryBuilder moreLikeThisQueryBuilder = new MoreLikeThisQueryBuilder((List<String>) in.readGenericValue());
|
||||
moreLikeThisQueryBuilder.likeTexts = (List<String>) in.readGenericValue();
|
||||
moreLikeThisQueryBuilder.unlikeTexts = (List<String>) in.readGenericValue();
|
||||
moreLikeThisQueryBuilder.likeItems = readItems(in);
|
||||
moreLikeThisQueryBuilder.unlikeItems = readItems(in);
|
||||
moreLikeThisQueryBuilder.maxQueryTerms = in.readVInt();
|
||||
moreLikeThisQueryBuilder.minTermFreq = in.readVInt();
|
||||
moreLikeThisQueryBuilder.minDocFreq = in.readVInt();
|
||||
moreLikeThisQueryBuilder.maxDocFreq = in.readVInt();
|
||||
moreLikeThisQueryBuilder.minWordLength = in.readVInt();
|
||||
moreLikeThisQueryBuilder.maxWordLength = in.readVInt();
|
||||
moreLikeThisQueryBuilder.stopWords = in.readOptionalStringArray();
|
||||
moreLikeThisQueryBuilder.analyzer = in.readOptionalString();
|
||||
moreLikeThisQueryBuilder.minimumShouldMatch = in.readString();
|
||||
moreLikeThisQueryBuilder.boostTerms = (Float) in.readGenericValue();
|
||||
moreLikeThisQueryBuilder.include = in.readBoolean();
|
||||
moreLikeThisQueryBuilder.failOnUnsupportedField = in.readBoolean();
|
||||
return moreLikeThisQueryBuilder;
|
||||
}
|
||||
|
||||
private static List<Item> readItems(StreamInput in) throws IOException {
|
||||
List<Item> items = new ArrayList<>();
|
||||
int size = in.readVInt();
|
||||
for (int i = 0; i < size; i++) {
|
||||
items.add(Item.readItemFrom(in));
|
||||
}
|
||||
return items;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doWriteTo(StreamOutput out) throws IOException {
|
||||
out.writeGenericValue(fields);
|
||||
out.writeGenericValue(likeTexts);
|
||||
out.writeGenericValue(unlikeTexts);
|
||||
writeItems(likeItems, out);
|
||||
writeItems(unlikeItems, out);
|
||||
out.writeVInt(maxQueryTerms);
|
||||
out.writeVInt(minTermFreq);
|
||||
out.writeVInt(minDocFreq);
|
||||
out.writeVInt(maxDocFreq);
|
||||
out.writeVInt(minWordLength);
|
||||
out.writeVInt(maxWordLength);
|
||||
out.writeOptionalStringArray(stopWords);
|
||||
out.writeOptionalString(analyzer);
|
||||
out.writeString(minimumShouldMatch);
|
||||
out.writeGenericValue(boostTerms);
|
||||
out.writeBoolean(include);
|
||||
out.writeBoolean(failOnUnsupportedField);
|
||||
}
|
||||
|
||||
private static void writeItems(List<Item> items, StreamOutput out) throws IOException {
|
||||
out.writeVInt(items.size());
|
||||
for (Item item : items) {
|
||||
item.writeTo(out);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int doHashCode() {
|
||||
return Objects.hash(fields, likeTexts, unlikeTexts, likeItems, unlikeItems, maxQueryTerms, minTermFreq,
|
||||
minDocFreq, maxDocFreq, minWordLength, maxWordLength, Arrays.hashCode(stopWords), analyzer, minimumShouldMatch,
|
||||
boostTerms, include, failOnUnsupportedField);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean doEquals(MoreLikeThisQueryBuilder other) {
|
||||
return Objects.equals(fields, other.fields) &&
|
||||
Objects.equals(likeTexts, other.likeTexts) &&
|
||||
Objects.equals(unlikeTexts, other.unlikeTexts) &&
|
||||
Objects.equals(likeItems, other.likeItems) &&
|
||||
Objects.equals(unlikeItems, other.unlikeItems) &&
|
||||
Objects.equals(maxQueryTerms, other.maxQueryTerms) &&
|
||||
Objects.equals(minTermFreq, other.minTermFreq) &&
|
||||
Objects.equals(minDocFreq, other.minDocFreq) &&
|
||||
Objects.equals(maxDocFreq, other.maxDocFreq) &&
|
||||
Objects.equals(minWordLength, other.minWordLength) &&
|
||||
Objects.equals(maxWordLength, other.maxWordLength) &&
|
||||
Arrays.equals(stopWords, other.stopWords) && // otherwise we are comparing pointers
|
||||
Objects.equals(analyzer, other.analyzer) &&
|
||||
Objects.equals(minimumShouldMatch, other.minimumShouldMatch) &&
|
||||
Objects.equals(boostTerms, other.boostTerms) &&
|
||||
Objects.equals(include, other.include) &&
|
||||
Objects.equals(failOnUnsupportedField, other.failOnUnsupportedField);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,43 +19,20 @@
|
|||
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.queries.TermsQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.action.termvectors.*;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.elasticsearch.index.mapper.Uid.createUidAsBytes;
|
||||
|
||||
/**
|
||||
* Parser for the The More Like This Query (MLT Query) which finds documents that are "like" a given set of documents.
|
||||
*
|
||||
* The documents are provided as a set of strings and/or a list of {@link Item}.
|
||||
*/
|
||||
public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
|
||||
public class MoreLikeThisQueryParser extends BaseQueryParser<MoreLikeThisQueryBuilder> {
|
||||
|
||||
public interface Field {
|
||||
ParseField FIELDS = new ParseField("fields");
|
||||
|
@ -84,23 +61,34 @@ public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Query parse(QueryShardContext context) throws IOException, QueryParsingException {
|
||||
QueryParseContext parseContext = context.parseContext();
|
||||
public MoreLikeThisQueryBuilder fromXContent(QueryParseContext parseContext) throws IOException, QueryParsingException {
|
||||
XContentParser parser = parseContext.parser();
|
||||
|
||||
MoreLikeThisQuery mltQuery = new MoreLikeThisQuery();
|
||||
mltQuery.setSimilarity(context.searchSimilarity());
|
||||
|
||||
// document inputs
|
||||
List<String> fields = null;
|
||||
List<String> likeTexts = new ArrayList<>();
|
||||
List<String> unlikeTexts = new ArrayList<>();
|
||||
List<Item> likeItems = new ArrayList<>();
|
||||
List<Item> unlikeItems = new ArrayList<>();
|
||||
|
||||
List<String> moreLikeFields = null;
|
||||
Analyzer analyzer = null;
|
||||
boolean include = false;
|
||||
// term selection parameters
|
||||
int maxQueryTerms = MoreLikeThisQueryBuilder.DEFAULT_MAX_QUERY_TERMS;
|
||||
int minTermFreq = MoreLikeThisQueryBuilder.DEFAULT_MIN_TERM_FREQ;
|
||||
int minDocFreq = MoreLikeThisQueryBuilder.DEFAULT_MIN_DOC_FREQ;
|
||||
int maxDocFreq = MoreLikeThisQueryBuilder.DEFAULT_MAX_DOC_FREQ;
|
||||
int minWordLength = MoreLikeThisQueryBuilder.DEFAULT_MIN_WORD_LENGTH;
|
||||
int maxWordLength = MoreLikeThisQueryBuilder.DEFAULT_MAX_WORD_LENGTH;
|
||||
List<String> stopWords = null;
|
||||
String analyzer = null;
|
||||
|
||||
boolean failOnUnsupportedField = true;
|
||||
// query formation parameters
|
||||
String minimumShouldMatch = MoreLikeThisQueryBuilder.DEFAULT_MINIMUM_SHOULD_MATCH;
|
||||
float boostTerms = MoreLikeThisQueryBuilder.DEFAULT_BOOST_TERMS;
|
||||
boolean include = MoreLikeThisQueryBuilder.DEFAULT_INCLUDE;
|
||||
|
||||
// other parameters
|
||||
boolean failOnUnsupportedField = MoreLikeThisQueryBuilder.DEFAULT_FAIL_ON_UNSUPPORTED_FIELDS;
|
||||
float boost = AbstractQueryBuilder.DEFAULT_BOOST;
|
||||
String queryName = null;
|
||||
|
||||
XContentParser.Token token;
|
||||
|
@ -116,37 +104,29 @@ public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
|
|||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.LIKE_TEXT)) {
|
||||
likeTexts.add(parser.text());
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MAX_QUERY_TERMS)) {
|
||||
mltQuery.setMaxQueryTerms(parser.intValue());
|
||||
maxQueryTerms = parser.intValue();
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MIN_TERM_FREQ)) {
|
||||
mltQuery.setMinTermFrequency(parser.intValue());
|
||||
minTermFreq =parser.intValue();
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MIN_DOC_FREQ)) {
|
||||
mltQuery.setMinDocFreq(parser.intValue());
|
||||
minDocFreq = parser.intValue();
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MAX_DOC_FREQ)) {
|
||||
mltQuery.setMaxDocFreq(parser.intValue());
|
||||
maxDocFreq = parser.intValue();
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MIN_WORD_LENGTH)) {
|
||||
mltQuery.setMinWordLen(parser.intValue());
|
||||
minWordLength = parser.intValue();
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MAX_WORD_LENGTH)) {
|
||||
mltQuery.setMaxWordLen(parser.intValue());
|
||||
maxWordLength = parser.intValue();
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.ANALYZER)) {
|
||||
analyzer = context.analysisService().analyzer(parser.text());
|
||||
analyzer = parser.text();
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MINIMUM_SHOULD_MATCH)) {
|
||||
mltQuery.setMinimumShouldMatch(parser.text());
|
||||
minimumShouldMatch = parser.text();
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.BOOST_TERMS)) {
|
||||
float boostFactor = parser.floatValue();
|
||||
if (boostFactor != 0) {
|
||||
mltQuery.setBoostTerms(true);
|
||||
mltQuery.setBoostTermsFactor(boostFactor);
|
||||
}
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.MINIMUM_SHOULD_MATCH)) {
|
||||
mltQuery.setMinimumShouldMatch(parser.text());
|
||||
} else if ("analyzer".equals(currentFieldName)) {
|
||||
analyzer = context.analysisService().analyzer(parser.text());
|
||||
boostTerms = parser.floatValue();
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.INCLUDE)) {
|
||||
include = parser.booleanValue();
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.FAIL_ON_UNSUPPORTED_FIELD)) {
|
||||
failOnUnsupportedField = parser.booleanValue();
|
||||
} else if ("boost".equals(currentFieldName)) {
|
||||
mltQuery.setBoost(parser.floatValue());
|
||||
boost = parser.floatValue();
|
||||
} else if ("_name".equals(currentFieldName)) {
|
||||
queryName = parser.text();
|
||||
} else {
|
||||
|
@ -154,11 +134,9 @@ public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
|
|||
}
|
||||
} else if (token == XContentParser.Token.START_ARRAY) {
|
||||
if (parseContext.parseFieldMatcher().match(currentFieldName, Field.FIELDS)) {
|
||||
moreLikeFields = new LinkedList<>();
|
||||
fields = new ArrayList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
String field = parser.text();
|
||||
MappedFieldType fieldType = context.fieldMapper(field);
|
||||
moreLikeFields.add(fieldType == null ? field : fieldType.names().indexName());
|
||||
fields.add(parser.text());
|
||||
}
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.LIKE)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
|
@ -183,11 +161,10 @@ public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
|
|||
likeItems.add(Item.parse(parser, parseContext.parseFieldMatcher(), new Item()));
|
||||
}
|
||||
} else if (parseContext.parseFieldMatcher().match(currentFieldName, Field.STOP_WORDS)) {
|
||||
Set<String> stopWords = new HashSet<>();
|
||||
stopWords = new ArrayList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
stopWords.add(parser.text());
|
||||
}
|
||||
mltQuery.setStopWords(stopWords);
|
||||
} else {
|
||||
throw new QueryParsingException(parseContext, "[mlt] query does not support [" + currentFieldName + "]");
|
||||
}
|
||||
|
@ -205,48 +182,32 @@ public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
|
|||
if (likeTexts.isEmpty() && likeItems.isEmpty()) {
|
||||
throw new QueryParsingException(parseContext, "more_like_this requires 'like' to be specified");
|
||||
}
|
||||
if (moreLikeFields != null && moreLikeFields.isEmpty()) {
|
||||
if (fields != null && fields.isEmpty()) {
|
||||
throw new QueryParsingException(parseContext, "more_like_this requires 'fields' to be non-empty");
|
||||
}
|
||||
|
||||
// set analyzer
|
||||
if (analyzer == null) {
|
||||
analyzer = context.mapperService().searchAnalyzer();
|
||||
}
|
||||
mltQuery.setAnalyzer(analyzer);
|
||||
|
||||
// set like text fields
|
||||
boolean useDefaultField = (moreLikeFields == null);
|
||||
if (useDefaultField) {
|
||||
moreLikeFields = Collections.singletonList(context.defaultField());
|
||||
}
|
||||
|
||||
// possibly remove unsupported fields
|
||||
removeUnsupportedFields(moreLikeFields, analyzer, failOnUnsupportedField);
|
||||
if (moreLikeFields.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
mltQuery.setMoreLikeFields(moreLikeFields.toArray(Strings.EMPTY_ARRAY));
|
||||
|
||||
// support for named query
|
||||
if (queryName != null) {
|
||||
context.addNamedQuery(queryName, mltQuery);
|
||||
}
|
||||
|
||||
// handle like texts
|
||||
if (!likeTexts.isEmpty()) {
|
||||
mltQuery.setLikeText(likeTexts);
|
||||
}
|
||||
if (!unlikeTexts.isEmpty()) {
|
||||
mltQuery.setUnlikeText(unlikeTexts);
|
||||
}
|
||||
|
||||
// handle items
|
||||
if (!likeItems.isEmpty()) {
|
||||
return handleItems(context, mltQuery, likeItems, unlikeItems, include, moreLikeFields, useDefaultField);
|
||||
} else {
|
||||
return mltQuery;
|
||||
MoreLikeThisQueryBuilder moreLikeThisQueryBuilder = new MoreLikeThisQueryBuilder(fields)
|
||||
.like(likeTexts.toArray(new String[likeTexts.size()]))
|
||||
.unlike(unlikeTexts.toArray(new String[unlikeTexts.size()]))
|
||||
.like(likeItems.toArray(new Item[likeItems.size()]))
|
||||
.unlike(unlikeItems.toArray(new Item[unlikeItems.size()]))
|
||||
.maxQueryTerms(maxQueryTerms)
|
||||
.minTermFreq(minTermFreq)
|
||||
.minDocFreq(minDocFreq)
|
||||
.maxDocFreq(maxDocFreq)
|
||||
.minWordLength(minWordLength)
|
||||
.maxWordLength(maxWordLength)
|
||||
.analyzer(analyzer)
|
||||
.minimumShouldMatch(minimumShouldMatch)
|
||||
.boostTerms(boostTerms)
|
||||
.include(include)
|
||||
.failOnUnsupportedField(failOnUnsupportedField)
|
||||
.boost(boost)
|
||||
.queryName(queryName);
|
||||
if (stopWords != null) {
|
||||
moreLikeThisQueryBuilder.stopWords(stopWords);
|
||||
}
|
||||
return moreLikeThisQueryBuilder;
|
||||
}
|
||||
|
||||
private static void parseLikeField(QueryParseContext parseContext, List<String> texts, List<Item> items) throws IOException {
|
||||
|
@ -260,139 +221,8 @@ public class MoreLikeThisQueryParser extends BaseQueryParserTemp {
|
|||
}
|
||||
}
|
||||
|
||||
private static List<String> removeUnsupportedFields(List<String> moreLikeFields, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
|
||||
for (Iterator<String> it = moreLikeFields.iterator(); it.hasNext(); ) {
|
||||
final String fieldName = it.next();
|
||||
if (!Analysis.generatesCharacterTokenStream(analyzer, fieldName)) {
|
||||
if (failOnUnsupportedField) {
|
||||
throw new IllegalArgumentException("more_like_this doesn't support binary/numeric fields: [" + fieldName + "]");
|
||||
} else {
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
return moreLikeFields;
|
||||
}
|
||||
|
||||
private Query handleItems(QueryShardContext context, MoreLikeThisQuery mltQuery, List<Item> likeItems, List<Item> unlikeItems,
|
||||
boolean include, List<String> moreLikeFields, boolean useDefaultField) throws IOException {
|
||||
|
||||
QueryParseContext parseContext = context.parseContext();
|
||||
// set default index, type and fields if not specified
|
||||
for (Item item : likeItems) {
|
||||
setDefaultIndexTypeFields(parseContext, item, moreLikeFields, useDefaultField);
|
||||
}
|
||||
for (Item item : unlikeItems) {
|
||||
setDefaultIndexTypeFields(parseContext, item, moreLikeFields, useDefaultField);
|
||||
}
|
||||
|
||||
// fetching the items with multi-termvectors API
|
||||
MultiTermVectorsResponse responses = fetchResponse(context.getClient(), likeItems, unlikeItems, SearchContext.current());
|
||||
|
||||
// getting the Fields for liked items
|
||||
mltQuery.setLikeText(getFieldsFor(responses, likeItems));
|
||||
|
||||
// getting the Fields for unliked items
|
||||
if (!unlikeItems.isEmpty()) {
|
||||
org.apache.lucene.index.Fields[] unlikeFields = getFieldsFor(responses, unlikeItems);
|
||||
if (unlikeFields.length > 0) {
|
||||
mltQuery.setUnlikeText(unlikeFields);
|
||||
}
|
||||
}
|
||||
|
||||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD);
|
||||
|
||||
// exclude the items from the search
|
||||
if (!include) {
|
||||
handleExclude(boolQuery, likeItems);
|
||||
}
|
||||
return boolQuery;
|
||||
}
|
||||
|
||||
private static void setDefaultIndexTypeFields(QueryParseContext parseContext, Item item, List<String> moreLikeFields,
|
||||
boolean useDefaultField) {
|
||||
if (item.index() == null) {
|
||||
item.index(parseContext.index().name());
|
||||
}
|
||||
if (item.type() == null) {
|
||||
if (parseContext.shardContext().queryTypes().size() > 1) {
|
||||
throw new QueryParsingException(parseContext,
|
||||
"ambiguous type for item with id: " + item.id() + " and index: " + item.index());
|
||||
} else {
|
||||
item.type(parseContext.shardContext().queryTypes().iterator().next());
|
||||
}
|
||||
}
|
||||
// default fields if not present but don't override for artificial docs
|
||||
if ((item.fields() == null || item.fields().length == 0) && item.doc() == null) {
|
||||
if (useDefaultField) {
|
||||
item.fields("*");
|
||||
} else {
|
||||
item.fields(moreLikeFields.toArray(new String[moreLikeFields.size()]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void handleExclude(BooleanQuery boolQuery, List<Item> likeItems) {
|
||||
// artificial docs get assigned a random id and should be disregarded
|
||||
List<BytesRef> uids = new ArrayList<>();
|
||||
for (Item item : likeItems) {
|
||||
if (item.doc() != null) {
|
||||
continue;
|
||||
}
|
||||
uids.add(createUidAsBytes(item.type(), item.id()));
|
||||
}
|
||||
if (!uids.isEmpty()) {
|
||||
TermsQuery query = new TermsQuery(UidFieldMapper.NAME, uids.toArray(new BytesRef[0]));
|
||||
boolQuery.add(query, BooleanClause.Occur.MUST_NOT);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public MoreLikeThisQueryBuilder getBuilderPrototype() {
|
||||
return MoreLikeThisQueryBuilder.PROTOTYPE;
|
||||
}
|
||||
|
||||
private MultiTermVectorsResponse fetchResponse(Client client, List<Item> likeItems, @Nullable List<Item> unlikeItems,
|
||||
SearchContext searchContext) throws IOException {
|
||||
MultiTermVectorsRequest request = new MultiTermVectorsRequest();
|
||||
for (Item item : likeItems) {
|
||||
request.add(item.toTermVectorsRequest());
|
||||
}
|
||||
if (unlikeItems != null) {
|
||||
for (Item item : unlikeItems) {
|
||||
request.add(item.toTermVectorsRequest());
|
||||
}
|
||||
}
|
||||
request.copyContextAndHeadersFrom(searchContext);
|
||||
return client.multiTermVectors(request).actionGet();
|
||||
}
|
||||
|
||||
private static Fields[] getFieldsFor(MultiTermVectorsResponse responses, List<Item> items) throws IOException {
|
||||
List<Fields> likeFields = new ArrayList<>();
|
||||
|
||||
Set<Item> selectedItems = new HashSet<>();
|
||||
for (Item request : items) {
|
||||
selectedItems.add(new Item(request.index(), request.type(), request.id()));
|
||||
}
|
||||
|
||||
for (MultiTermVectorsItemResponse response : responses) {
|
||||
if (!hasResponseFromRequest(response, selectedItems)) {
|
||||
continue;
|
||||
}
|
||||
if (response.isFailed()) {
|
||||
continue;
|
||||
}
|
||||
TermVectorsResponse getResponse = response.getResponse();
|
||||
if (!getResponse.isExists()) {
|
||||
continue;
|
||||
}
|
||||
likeFields.add(getResponse.getFields());
|
||||
}
|
||||
return likeFields.toArray(Fields.EMPTY_ARRAY);
|
||||
}
|
||||
|
||||
private static boolean hasResponseFromRequest(MultiTermVectorsItemResponse response, Set<Item> selectedItems) {
|
||||
return selectedItems.contains(new Item(response.getIndex(), response.getType(), response.getId()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
package org.elasticsearch.index.query;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queryparser.classic.MapperQueryParser;
|
||||
import org.apache.lucene.queryparser.classic.QueryParserSettings;
|
||||
|
@ -33,21 +32,15 @@ import org.elasticsearch.client.Client;
|
|||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.geo.builders.ShapeBuilder;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.analysis.AnalysisService;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.mapper.ContentPath;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.Mapper;
|
||||
import org.elasticsearch.index.mapper.MapperBuilders;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.mapper.*;
|
||||
import org.elasticsearch.index.mapper.core.StringFieldMapper;
|
||||
import org.elasticsearch.index.mapper.object.ObjectMapper;
|
||||
import org.elasticsearch.index.query.support.NestedScope;
|
||||
import org.elasticsearch.indices.cache.query.terms.TermsLookup;
|
||||
import org.elasticsearch.script.ExecutableScript;
|
||||
import org.elasticsearch.script.ScriptContext;
|
||||
import org.elasticsearch.script.ScriptService;
|
||||
|
@ -56,11 +49,9 @@ import org.elasticsearch.search.fetch.innerhits.InnerHitsContext;
|
|||
import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.search.lookup.SearchLookup;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,289 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.memory.MemoryIndex;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.action.termvectors.*;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.VersionType;
|
||||
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
|
||||
import org.hamcrest.Matchers;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.hamcrest.Matchers.is;
|
||||
|
||||
public class MoreLikeThisQueryBuilderTests extends AbstractQueryTestCase<MoreLikeThisQueryBuilder> {
|
||||
|
||||
private static String[] randomFields;
|
||||
private static Item[] randomLikeItems;
|
||||
private static Item[] randomUnlikeItems;
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
// MLT only supports string fields, unsupported fields are tested below
|
||||
randomFields = randomStringFields();
|
||||
// we also preset the item requests
|
||||
randomLikeItems = new Item[randomIntBetween(1, 3)];
|
||||
for (int i = 0; i < randomLikeItems.length; i++) {
|
||||
randomLikeItems[i] = generateRandomItem();
|
||||
}
|
||||
// and for the unlike items too
|
||||
randomUnlikeItems = new Item[randomIntBetween(1, 3)];
|
||||
for (int i = 0; i < randomUnlikeItems.length; i++) {
|
||||
randomUnlikeItems[i] = generateRandomItem();
|
||||
}
|
||||
}
|
||||
|
||||
private static String[] randomStringFields() {
|
||||
String[] mappedStringFields = new String[]{STRING_FIELD_NAME, STRING_FIELD_NAME_2};
|
||||
String[] unmappedStringFields = generateRandomStringArray(2, 5, false, false);
|
||||
return Stream.concat(Arrays.stream(mappedStringFields), Arrays.stream(unmappedStringFields)).toArray(String[]::new);
|
||||
}
|
||||
|
||||
private Item generateRandomItem() {
|
||||
String index = randomBoolean() ? getIndex().getName() : null;
|
||||
String type = getRandomType(); // set to one type to avoid ambiguous types
|
||||
// indexed item or artificial document
|
||||
Item item;
|
||||
if (randomBoolean()) {
|
||||
item = new Item(index, type, randomAsciiOfLength(10));
|
||||
} else {
|
||||
item = new Item(index, type, randomArtificialDoc());
|
||||
}
|
||||
// if no field is specified MLT uses all mapped fields for this item
|
||||
if (randomBoolean()) {
|
||||
item.fields(randomFrom(randomFields));
|
||||
}
|
||||
// per field analyzer
|
||||
if (randomBoolean()) {
|
||||
item.perFieldAnalyzer(randomPerFieldAnalyzer());
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
item.routing(randomAsciiOfLength(10));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
item.version(randomInt(5));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
item.versionType(randomFrom(VersionType.values()));
|
||||
}
|
||||
return item;
|
||||
}
|
||||
|
||||
private XContentBuilder randomArtificialDoc() {
|
||||
XContentBuilder doc;
|
||||
try {
|
||||
doc = XContentFactory.jsonBuilder().startObject();
|
||||
for (String field : randomFields) {
|
||||
doc.field(field, randomAsciiOfLength(10));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchException("Unable to generate random artificial doc!");
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
private Map<String, String> randomPerFieldAnalyzer() {
|
||||
Map<String, String> perFieldAnalyzer = new HashMap<>();
|
||||
for (String field : randomFields) {
|
||||
perFieldAnalyzer.put(field, randomAnalyzer());
|
||||
}
|
||||
return perFieldAnalyzer;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MoreLikeThisQueryBuilder doCreateTestQueryBuilder() {
|
||||
MoreLikeThisQueryBuilder queryBuilder;
|
||||
if (randomBoolean()) { // for the default field
|
||||
queryBuilder = new MoreLikeThisQueryBuilder();
|
||||
} else {
|
||||
queryBuilder = new MoreLikeThisQueryBuilder(randomFields);
|
||||
}
|
||||
// like field is required
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.like(generateRandomStringArray(5, 5, false, false));
|
||||
} else {
|
||||
queryBuilder.like(randomLikeItems);
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.unlike(generateRandomStringArray(5, 5, false, false));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.unlike(randomUnlikeItems);
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.maxQueryTerms(randomInt(25));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.minTermFreq(randomInt(5));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.minDocFreq(randomInt(5));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.maxDocFreq(randomInt(100));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.minWordLength(randomInt(5));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.maxWordLength(randomInt(25));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.stopWords(generateRandomStringArray(5, 5, false, false));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.analyzer(randomAnalyzer()); // fix the analyzer?
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.minimumShouldMatch(randomMinimumShouldMatch());
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.boostTerms(randomFloat() * 10);
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.include(randomBoolean());
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
queryBuilder.failOnUnsupportedField(randomBoolean());
|
||||
}
|
||||
return queryBuilder;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MultiTermVectorsResponse executeMultiTermVectors(MultiTermVectorsRequest mtvRequest) {
|
||||
try {
|
||||
MultiTermVectorsItemResponse[] responses = new MultiTermVectorsItemResponse[mtvRequest.size()];
|
||||
int i = 0;
|
||||
for (TermVectorsRequest request : mtvRequest) {
|
||||
TermVectorsResponse response = new TermVectorsResponse(request.index(), request.type(), request.id());
|
||||
response.setExists(true);
|
||||
Fields generatedFields;
|
||||
if (request.doc() != null) {
|
||||
generatedFields = generateFields(randomFields, request.doc().toUtf8());
|
||||
} else {
|
||||
generatedFields = generateFields(request.selectedFields().toArray(new String[0]), request.id());
|
||||
}
|
||||
EnumSet<TermVectorsRequest.Flag> flags = EnumSet.of(TermVectorsRequest.Flag.Positions, TermVectorsRequest.Flag.Offsets);
|
||||
response.setFields(generatedFields, request.selectedFields(), flags, generatedFields);
|
||||
responses[i++] = new MultiTermVectorsItemResponse(response, null);
|
||||
}
|
||||
return new MultiTermVectorsResponse(responses);
|
||||
} catch (IOException ex) {
|
||||
throw new ElasticsearchException("boom", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Here we could go overboard and use a pre-generated indexed random document for a given Item,
|
||||
* but for now we'd prefer to simply return the id as the content of the document and that for
|
||||
* every field.
|
||||
*/
|
||||
private static Fields generateFields(String[] fieldNames, String text) throws IOException {
|
||||
MemoryIndex index = new MemoryIndex();
|
||||
for (String fieldName : fieldNames) {
|
||||
index.addField(fieldName, text, new WhitespaceAnalyzer());
|
||||
}
|
||||
return MultiFields.getFields(index.createSearcher().getIndexReader());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doAssertLuceneQuery(MoreLikeThisQueryBuilder queryBuilder, Query query, QueryShardContext context) throws IOException {
|
||||
if (!queryBuilder.likeItems().isEmpty()) {
|
||||
assertThat(query, Matchers.instanceOf(BooleanQuery.class));
|
||||
} else {
|
||||
// we rely on integration tests for a deeper check here
|
||||
assertThat(query, Matchers.instanceOf(MoreLikeThisQuery.class));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidate() {
|
||||
MoreLikeThisQueryBuilder queryBuilder = new MoreLikeThisQueryBuilder(Strings.EMPTY_ARRAY);
|
||||
assertThat(queryBuilder.validate().validationErrors().size(), is(2));
|
||||
|
||||
queryBuilder = new MoreLikeThisQueryBuilder(Strings.EMPTY_ARRAY).like("some text");
|
||||
assertThat(queryBuilder.validate().validationErrors().size(), is(1));
|
||||
|
||||
queryBuilder = new MoreLikeThisQueryBuilder("field").like(Strings.EMPTY_ARRAY);
|
||||
assertThat(queryBuilder.validate().validationErrors().size(), is(1));
|
||||
|
||||
queryBuilder = new MoreLikeThisQueryBuilder("field").like(Item.EMPTY_ARRAY);
|
||||
assertThat(queryBuilder.validate().validationErrors().size(), is(1));
|
||||
|
||||
queryBuilder = new MoreLikeThisQueryBuilder("field").like("some text");
|
||||
assertNull(queryBuilder.validate());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnsupportedFields() throws IOException {
|
||||
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
|
||||
String unsupportedField = randomFrom(INT_FIELD_NAME, DOUBLE_FIELD_NAME, DATE_FIELD_NAME);
|
||||
MoreLikeThisQueryBuilder queryBuilder = new MoreLikeThisQueryBuilder(unsupportedField)
|
||||
.like("some text")
|
||||
.failOnUnsupportedField(true);
|
||||
try {
|
||||
queryBuilder.toQuery(createShardContext());
|
||||
fail("should have failed with IllegalArgumentException for field: " + unsupportedField);
|
||||
} catch (IllegalArgumentException e) {
|
||||
assertThat(e.getMessage(), Matchers.containsString("more_like_this doesn't support binary/numeric fields"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testItemSerialization() throws IOException {
|
||||
Item expectedItem = generateRandomItem();
|
||||
BytesStreamOutput output = new BytesStreamOutput();
|
||||
expectedItem.writeTo(output);
|
||||
Item newItem = Item.readItemFrom(StreamInput.wrap(output.bytes()));
|
||||
assertEquals(expectedItem, newItem);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testItemFromXContent() throws IOException {
|
||||
Item expectedItem = generateRandomItem();
|
||||
String json = expectedItem.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS).string();
|
||||
XContentParser parser = XContentFactory.xContent(json).createParser(json);
|
||||
Item newItem = Item.parse(parser, ParseFieldMatcher.STRICT, new Item());
|
||||
assertEquals(expectedItem, newItem);
|
||||
}
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.morelikethis;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.VersionType;
|
||||
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
public class ItemSerializationTests extends ESTestCase {
|
||||
|
||||
private Item generateRandomItem(int arraySize, int stringSize) {
|
||||
String index = randomAsciiOfLength(stringSize);
|
||||
String type = randomAsciiOfLength(stringSize);
|
||||
String id = String.valueOf(Math.abs(randomInt()));
|
||||
String[] fields = generateRandomStringArray(arraySize, stringSize, true);
|
||||
String routing = randomBoolean() ? randomAsciiOfLength(stringSize) : null;
|
||||
long version = Math.abs(randomLong());
|
||||
VersionType versionType = RandomPicks.randomFrom(new Random(), VersionType.values());
|
||||
return new Item(index, type, id).fields(fields).routing(routing).version(version).versionType(versionType);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testItemSerialization() throws Exception {
|
||||
int numOfTrials = 100;
|
||||
int maxArraySize = 7;
|
||||
int maxStringSize = 8;
|
||||
for (int i = 0; i < numOfTrials; i++) {
|
||||
Item item1 = generateRandomItem(maxArraySize, maxStringSize);
|
||||
String json = item1.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS).string();
|
||||
XContentParser parser = XContentFactory.xContent(json).createParser(json);
|
||||
Item item2 = Item.parse(parser, ParseFieldMatcher.STRICT, new Item());
|
||||
assertEquals(item1, item2);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -72,7 +72,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
|
||||
logger.info("Running moreLikeThis");
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(response, 1l);
|
||||
}
|
||||
|
||||
|
@ -92,7 +92,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
|
||||
logger.info("Running moreLikeThis");
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(response, 0l);
|
||||
}
|
||||
|
||||
|
@ -119,24 +119,24 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
|
||||
logger.info("Running moreLikeThis on index");
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(response, 2l);
|
||||
|
||||
logger.info("Running moreLikeThis on beta shard");
|
||||
response = client().prepareSearch("beta").setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(response, 1l);
|
||||
assertThat(response.getHits().getAt(0).id(), equalTo("3"));
|
||||
|
||||
logger.info("Running moreLikeThis on release shard");
|
||||
response = client().prepareSearch("release").setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(response, 1l);
|
||||
assertThat(response.getHits().getAt(0).id(), equalTo("2"));
|
||||
|
||||
logger.info("Running moreLikeThis on alias with node client");
|
||||
response = internalCluster().clientNodeClient().prepareSearch("beta").setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(response, 1l);
|
||||
assertThat(response.getHits().getAt(0).id(), equalTo("3"));
|
||||
}
|
||||
|
@ -156,11 +156,11 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
|
||||
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1"))).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("foo", "bar", "1"))).get();
|
||||
assertNoFailures(response);
|
||||
assertThat(response, notNullValue());
|
||||
response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1"))).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("foo", "bar", "1"))).get();
|
||||
assertNoFailures(response);
|
||||
assertThat(response, notNullValue());
|
||||
}
|
||||
|
@ -182,7 +182,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
client().admin().indices().prepareRefresh("foo").execute().actionGet();
|
||||
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1").routing("2"))).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("foo", "bar", "1").routing("2"))).get();
|
||||
assertNoFailures(response);
|
||||
assertThat(response, notNullValue());
|
||||
}
|
||||
|
@ -205,7 +205,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
.execute().actionGet();
|
||||
client().admin().indices().prepareRefresh("foo").execute().actionGet();
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1").routing("4000"))).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("foo", "bar", "1").routing("4000"))).get();
|
||||
assertNoFailures(response);
|
||||
assertThat(response, notNullValue());
|
||||
}
|
||||
|
@ -233,12 +233,12 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
|
||||
// Implicit list of fields -> ignore numeric fields
|
||||
SearchResponse searchResponse = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)).get();
|
||||
assertHitCount(searchResponse, 1l);
|
||||
|
||||
// Explicit list of fields including numeric fields -> fail
|
||||
assertThrows(client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder("string_value", "int_value").addLikeItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class);
|
||||
new MoreLikeThisQueryBuilder("string_value", "int_value").like(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class);
|
||||
|
||||
// mlt query with no field -> OK
|
||||
searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery().likeText("index").minTermFreq(1).minDocFreq(1)).execute().actionGet();
|
||||
|
@ -295,16 +295,16 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
|
||||
logger.info("Running More Like This with include true");
|
||||
SearchResponse response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
|
||||
assertOrderedSearchHits(response, "1", "2");
|
||||
|
||||
response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "2")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "2")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
|
||||
assertOrderedSearchHits(response, "2", "1");
|
||||
|
||||
logger.info("Running More Like This with include false");
|
||||
response = client().prepareSearch().setQuery(
|
||||
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).minimumShouldMatch("0%")).get();
|
||||
new MoreLikeThisQueryBuilder().like(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).minimumShouldMatch("0%")).get();
|
||||
assertSearchHits(response, "2");
|
||||
}
|
||||
|
||||
|
@ -355,7 +355,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
|
||||
logger.info("Running MoreLikeThis");
|
||||
MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").include(true).minTermFreq(1).minDocFreq(1)
|
||||
.addLikeItem(new Item("test", "type0", "0"));
|
||||
.like(new Item("test", "type0", "0"));
|
||||
|
||||
String[] types = new String[numOfTypes];
|
||||
for (int i = 0; i < numOfTypes; i++) {
|
||||
|
@ -573,7 +573,7 @@ public class MoreLikeThisIT extends ESIntegTestCase {
|
|||
docs.add(new Item("test", "type1", i+""));
|
||||
mltQuery = moreLikeThisQuery()
|
||||
.like(new Item("test", "type1", doc))
|
||||
.ignoreLike(docs.toArray(Item.EMPTY_ARRAY))
|
||||
.unlike(docs.toArray(Item.EMPTY_ARRAY))
|
||||
.minTermFreq(0)
|
||||
.minDocFreq(0)
|
||||
.maxQueryTerms(100)
|
||||
|
|
|
@ -230,7 +230,7 @@ public class ContextAndHeaderTransportIT extends ESIntegTestCase {
|
|||
transportClient().admin().indices().prepareRefresh(lookupIndex, queryIndex).get();
|
||||
|
||||
MoreLikeThisQueryBuilder moreLikeThisQueryBuilder = QueryBuilders.moreLikeThisQuery("name")
|
||||
.addLikeItem(new Item(lookupIndex, "type", "1"))
|
||||
.like(new Item(lookupIndex, "type", "1"))
|
||||
.minTermFreq(1)
|
||||
.minDocFreq(1);
|
||||
|
||||
|
|
|
@ -88,3 +88,14 @@ makes the type / path parameter mandatory.
|
|||
|
||||
Moving MatchQueryBuilder.Type and MatchQueryBuilder.ZeroTermsQuery enum to MatchQuery.Type.
|
||||
Also reusing new Operator enum.
|
||||
|
||||
==== MoreLikeThisQueryBuilder
|
||||
|
||||
Removed `MoreLikeThisQueryBuilder.Item#id(String id)`, `Item#doc(BytesReference doc)`,
|
||||
`Item#doc(XContentBuilder doc)`. Use provided constructors instead.
|
||||
|
||||
Removed `MoreLikeThisQueryBuilder#addLike` and `addUnlike` in favor to using the `like`
|
||||
and `unlike` methods.
|
||||
|
||||
The deprecated `docs(Item... docs)`, `ignoreLike(Item... docs)`,
|
||||
`ignoreLike(String... likeText)`, `addItem(Item... likeItems)` have been removed.
|
||||
|
|
Loading…
Reference in New Issue