MLT Query: use minimum should match more extensive syntax
The minimum number of optional should clauses of the generated query to match can now be set using the more extensive minimum should match syntax. This makes the `percent_terms_to_match` parameter deprecated, and replaced in favor to a new `minimum_should_match` parameter. Closes #7898
This commit is contained in:
parent
03d880de38
commit
5014158d6b
|
@ -29,8 +29,9 @@ The `more_like_this_field` top level parameters include:
|
|||
|Parameter |Description
|
||||
|`like_text` |The text to find documents like it, *required*.
|
||||
|
||||
|`percent_terms_to_match` |The percentage of terms to match on (float
|
||||
value). Defaults to `0.3` (30 percent).
|
||||
|`minimum_should_match`| From the generated query, the number of terms that
|
||||
must match following the <<query-dsl-minimum-should-match,minimum should
|
||||
syntax>>. (Defaults to `"30%"`).
|
||||
|
||||
|`min_term_freq` |The frequency below which terms will be ignored in the
|
||||
source doc. The default frequency is `2`.
|
||||
|
|
|
@ -87,8 +87,9 @@ unless specified otherwise in each `doc`.
|
|||
|`include` |When using `ids` or `docs`, specifies whether the documents should be
|
||||
included from the search. Defaults to `false`.
|
||||
|
||||
|`percent_terms_to_match` |From the generated query, the percentage of terms
|
||||
that must match (float value between 0 and 1). Defaults to `0.3` (30 percent).
|
||||
|`minimum_should_match`| From the generated query, the number of terms that
|
||||
must match following the <<query-dsl-minimum-should-match,minimum should
|
||||
syntax>>. (Defaults to `"30%"`).
|
||||
|
||||
|`min_term_freq` |The frequency below which terms will be ignored in the
|
||||
source doc. The default frequency is `2`.
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.elasticsearch.common.bytes.BytesArray;
|
|||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.lucene.search.Queries;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.search.Scroll;
|
||||
|
@ -66,7 +67,7 @@ public class MoreLikeThisRequest extends ActionRequest<MoreLikeThisRequest> impl
|
|||
|
||||
private String[] fields;
|
||||
|
||||
private float percentTermsToMatch = -1;
|
||||
private String minimumShouldMatch = "0%";
|
||||
private int minTermFreq = -1;
|
||||
private int maxQueryTerms = -1;
|
||||
private String[] stopWords = null;
|
||||
|
@ -211,18 +212,44 @@ public class MoreLikeThisRequest extends ActionRequest<MoreLikeThisRequest> impl
|
|||
}
|
||||
|
||||
/**
|
||||
* The percent of the terms to match for each field. Defaults to <tt>0.3f</tt>.
|
||||
* Number of terms that must match the generated query expressed in the
|
||||
* common syntax for minimum should match. Defaults to <tt>30%</tt>.
|
||||
*
|
||||
* @see org.elasticsearch.common.lucene.search.Queries#calculateMinShouldMatch(int, String)
|
||||
*/
|
||||
public MoreLikeThisRequest percentTermsToMatch(float percentTermsToMatch) {
|
||||
this.percentTermsToMatch = percentTermsToMatch;
|
||||
public MoreLikeThisRequest minimumShouldMatch(String minimumShouldMatch) {
|
||||
this.minimumShouldMatch = minimumShouldMatch;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of terms that must match the generated query expressed in the
|
||||
* common syntax for minimum should match.
|
||||
*
|
||||
* @see org.elasticsearch.common.lucene.search.Queries#calculateMinShouldMatch(int, String)
|
||||
*/
|
||||
public String minimumShouldMatch() {
|
||||
return this.minimumShouldMatch;
|
||||
}
|
||||
|
||||
/**
|
||||
* The percent of the terms to match for each field. Defaults to <tt>0.3f</tt>.
|
||||
*/
|
||||
@Deprecated
|
||||
public MoreLikeThisRequest percentTermsToMatch(float percentTermsToMatch) {
|
||||
return minimumShouldMatch((int) (percentTermsToMatch * 100) + "%");
|
||||
}
|
||||
|
||||
/**
|
||||
* The percent of the terms to match for each field. Defaults to <tt>0.3f</tt>.
|
||||
*/
|
||||
@Deprecated
|
||||
public float percentTermsToMatch() {
|
||||
return this.percentTermsToMatch;
|
||||
if (minimumShouldMatch.endsWith("%")) {
|
||||
return Float.parseFloat(minimumShouldMatch.substring(0, minimumShouldMatch.indexOf("%"))) / 100;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -584,7 +611,12 @@ public class MoreLikeThisRequest extends ActionRequest<MoreLikeThisRequest> impl
|
|||
}
|
||||
}
|
||||
|
||||
percentTermsToMatch = in.readFloat();
|
||||
if (in.getVersion().onOrAfter(Version.V_1_5_0)) {
|
||||
minimumShouldMatch(in.readString());
|
||||
} else {
|
||||
percentTermsToMatch(in.readFloat());
|
||||
}
|
||||
|
||||
minTermFreq = in.readVInt();
|
||||
maxQueryTerms = in.readVInt();
|
||||
size = in.readVInt();
|
||||
|
@ -661,7 +693,12 @@ public class MoreLikeThisRequest extends ActionRequest<MoreLikeThisRequest> impl
|
|||
}
|
||||
}
|
||||
|
||||
out.writeFloat(percentTermsToMatch);
|
||||
if (out.getVersion().onOrAfter(Version.V_1_5_0)) {
|
||||
out.writeString(minimumShouldMatch);
|
||||
} else {
|
||||
out.writeFloat(percentTermsToMatch());
|
||||
}
|
||||
|
||||
out.writeVInt(minTermFreq);
|
||||
out.writeVInt(maxQueryTerms);
|
||||
if (stopWords == null) {
|
||||
|
|
|
@ -60,12 +60,22 @@ public class MoreLikeThisRequestBuilder extends ActionRequestBuilder<MoreLikeThi
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of terms that must match the generated query expressed in the
|
||||
* common syntax for minimum should match. Defaults to <tt>30%</tt>.
|
||||
*
|
||||
* @see org.elasticsearch.common.lucene.search.Queries#calculateMinShouldMatch(int, String)
|
||||
*/
|
||||
public MoreLikeThisRequestBuilder setMinimumShouldMatch(String minimumShouldMatch) {
|
||||
request.minimumShouldMatch(minimumShouldMatch);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* The percent of the terms to match for each field. Defaults to <tt>0.3f</tt>.
|
||||
*/
|
||||
public MoreLikeThisRequestBuilder setPercentTermsToMatch(float percentTermsToMatch) {
|
||||
request.percentTermsToMatch(percentTermsToMatch);
|
||||
return this;
|
||||
return setMinimumShouldMatch((int) (percentTermsToMatch * 100) + "%");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -323,7 +323,7 @@ public class TransportMoreLikeThisAction extends HandledTransportAction<MoreLike
|
|||
private void addMoreLikeThis(MoreLikeThisRequest request, BoolQueryBuilder boolBuilder, String fieldName, String likeText, boolean failOnUnsupportedField) {
|
||||
MoreLikeThisFieldQueryBuilder mlt = moreLikeThisFieldQuery(fieldName)
|
||||
.likeText(likeText)
|
||||
.percentTermsToMatch(request.percentTermsToMatch())
|
||||
.minimumShouldMatch(request.minimumShouldMatch())
|
||||
.boostTerms(request.boostTerms())
|
||||
.minDocFreq(request.minDocFreq())
|
||||
.maxDocFreq(request.maxDocFreq())
|
||||
|
|
|
@ -42,7 +42,7 @@ import java.util.Set;
|
|||
*/
|
||||
public class MoreLikeThisQuery extends Query {
|
||||
|
||||
public static final float DEFAULT_PERCENT_TERMS_TO_MATCH = 0.3f;
|
||||
public static final String DEFAULT_MINIMUM_SHOULD_MATCH = "30%";
|
||||
|
||||
private TFIDFSimilarity similarity;
|
||||
|
||||
|
@ -50,7 +50,7 @@ public class MoreLikeThisQuery extends Query {
|
|||
private Fields[] likeFields;
|
||||
private String[] moreLikeFields;
|
||||
private Analyzer analyzer;
|
||||
private float percentTermsToMatch = DEFAULT_PERCENT_TERMS_TO_MATCH;
|
||||
private String minimumShouldMatch = DEFAULT_MINIMUM_SHOULD_MATCH;
|
||||
private int minTermFrequency = XMoreLikeThis.DEFAULT_MIN_TERM_FREQ;
|
||||
private int maxQueryTerms = XMoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
|
||||
private Set<?> stopWords = XMoreLikeThis.DEFAULT_STOP_WORDS;
|
||||
|
@ -84,7 +84,7 @@ public class MoreLikeThisQuery extends Query {
|
|||
result = 31 * result + minTermFrequency;
|
||||
result = 31 * result + minWordLen;
|
||||
result = 31 * result + Arrays.hashCode(moreLikeFields);
|
||||
result = 31 * result + Float.floatToIntBits(percentTermsToMatch);
|
||||
result = 31 * result + minimumShouldMatch.hashCode();
|
||||
result = 31 * result + (stopWords == null ? 0 : stopWords.hashCode());
|
||||
result = 31 * result + Float.floatToIntBits(getBoost());
|
||||
return result;
|
||||
|
@ -119,7 +119,7 @@ public class MoreLikeThisQuery extends Query {
|
|||
return false;
|
||||
if (!Arrays.equals(moreLikeFields, other.moreLikeFields))
|
||||
return false;
|
||||
if (percentTermsToMatch != other.percentTermsToMatch)
|
||||
if (!minimumShouldMatch.equals(other.minimumShouldMatch))
|
||||
return false;
|
||||
if (similarity == null) {
|
||||
if (other.similarity != null)
|
||||
|
@ -153,7 +153,7 @@ public class MoreLikeThisQuery extends Query {
|
|||
BooleanQuery bq = new BooleanQuery();
|
||||
if (this.likeFields != null) {
|
||||
Query mltQuery = mlt.like(this.likeFields);
|
||||
setMinimumShouldMatch((BooleanQuery) mltQuery, percentTermsToMatch);
|
||||
Queries.applyMinimumShouldMatch((BooleanQuery) mltQuery, minimumShouldMatch);
|
||||
bq.add(mltQuery, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
if (this.likeText != null) {
|
||||
|
@ -163,7 +163,7 @@ public class MoreLikeThisQuery extends Query {
|
|||
}
|
||||
//LUCENE 4 UPGRADE this mapps the 3.6 behavior (only use the first field)
|
||||
Query mltQuery = mlt.like(moreLikeFields[0], readers);
|
||||
setMinimumShouldMatch((BooleanQuery) mltQuery, percentTermsToMatch);
|
||||
Queries.applyMinimumShouldMatch((BooleanQuery) mltQuery, minimumShouldMatch);
|
||||
bq.add(mltQuery, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
|
||||
|
@ -231,12 +231,24 @@ public class MoreLikeThisQuery extends Query {
|
|||
this.analyzer = analyzer;
|
||||
}
|
||||
|
||||
public float getPercentTermsToMatch() {
|
||||
return percentTermsToMatch;
|
||||
/**
|
||||
* Number of terms that must match the generated query expressed in the
|
||||
* common syntax for minimum should match.
|
||||
*
|
||||
* @see org.elasticsearch.common.lucene.search.Queries#calculateMinShouldMatch(int, String)
|
||||
*/
|
||||
public String getMinimumShouldMatch() {
|
||||
return minimumShouldMatch;
|
||||
}
|
||||
|
||||
public void setPercentTermsToMatch(float percentTermsToMatch) {
|
||||
this.percentTermsToMatch = percentTermsToMatch;
|
||||
/**
|
||||
* Number of terms that must match the generated query expressed in the
|
||||
* common syntax for minimum should match. Defaults to <tt>30%</tt>.
|
||||
*
|
||||
* @see org.elasticsearch.common.lucene.search.Queries#calculateMinShouldMatch(int, String)
|
||||
*/
|
||||
public void setMinimumShouldMatch(String minimumShouldMatch) {
|
||||
this.minimumShouldMatch = minimumShouldMatch;
|
||||
}
|
||||
|
||||
public int getMinTermFrequency() {
|
||||
|
@ -310,9 +322,4 @@ public class MoreLikeThisQuery extends Query {
|
|||
public void setBoostTermsFactor(float boostTermsFactor) {
|
||||
this.boostTermsFactor = boostTermsFactor;
|
||||
}
|
||||
|
||||
private static void setMinimumShouldMatch(BooleanQuery bq, float percentTermsToMatch) {
|
||||
BooleanClause[] clauses = bq.getClauses();
|
||||
bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ public class MoreLikeThisFieldQueryBuilder extends BaseQueryBuilder implements B
|
|||
private final String name;
|
||||
|
||||
private String likeText;
|
||||
private float percentTermsToMatch = -1;
|
||||
private String minimumShouldMatch = null;
|
||||
private int minTermFreq = -1;
|
||||
private int maxQueryTerms = -1;
|
||||
private String[] stopWords = null;
|
||||
|
@ -63,12 +63,23 @@ public class MoreLikeThisFieldQueryBuilder extends BaseQueryBuilder implements B
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of terms that must match the generated query expressed in the
|
||||
* common syntax for minimum should match. Defaults to <tt>30%</tt>.
|
||||
*
|
||||
* @see org.elasticsearch.common.lucene.search.Queries#calculateMinShouldMatch(int, String)
|
||||
*/
|
||||
public MoreLikeThisFieldQueryBuilder minimumShouldMatch(String minimumShouldMatch) {
|
||||
this.minimumShouldMatch = minimumShouldMatch;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* The percentage of terms to match. Defaults to <tt>0.3</tt>.
|
||||
*/
|
||||
@Deprecated
|
||||
public MoreLikeThisFieldQueryBuilder percentTermsToMatch(float percentTermsToMatch) {
|
||||
this.percentTermsToMatch = percentTermsToMatch;
|
||||
return this;
|
||||
return minimumShouldMatch((int) (percentTermsToMatch * 100) + "%");
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -183,8 +194,8 @@ public class MoreLikeThisFieldQueryBuilder extends BaseQueryBuilder implements B
|
|||
MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName() +"' to be provided");
|
||||
}
|
||||
builder.field(MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName(), likeText);
|
||||
if (percentTermsToMatch != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.PERCENT_TERMS_TO_MATCH.getPreferredName(), percentTermsToMatch);
|
||||
if (minimumShouldMatch != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);
|
||||
}
|
||||
if (minTermFreq != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.MIN_TERM_FREQ.getPreferredName(), minTermFreq);
|
||||
|
|
|
@ -96,8 +96,10 @@ public class MoreLikeThisFieldQueryParser implements QueryParser {
|
|||
mltQuery.setBoostTerms(true);
|
||||
mltQuery.setBoostTermsFactor(boostFactor);
|
||||
}
|
||||
} else if (MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.match(currentFieldName,parseContext.parseFlags())) {
|
||||
mltQuery.setMinimumShouldMatch(parser.text());
|
||||
} else if (MoreLikeThisQueryParser.Fields.PERCENT_TERMS_TO_MATCH.match(currentFieldName,parseContext.parseFlags())) {
|
||||
mltQuery.setPercentTermsToMatch(parser.floatValue());
|
||||
mltQuery.setMinimumShouldMatch((int) (parser.floatValue() * 100) + "%");
|
||||
} else if ("analyzer".equals(currentFieldName)) {
|
||||
analyzer = parseContext.analysisService().analyzer(parser.text());
|
||||
} else if ("boost".equals(currentFieldName)) {
|
||||
|
|
|
@ -103,7 +103,7 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
|
|||
private List<String> ids = new ArrayList<>();
|
||||
private List<Item> docs = new ArrayList<>();
|
||||
private Boolean include = null;
|
||||
private float percentTermsToMatch = -1;
|
||||
private String minimumShouldMatch = null;
|
||||
private int minTermFreq = -1;
|
||||
private int maxQueryTerms = -1;
|
||||
private String[] stopWords = null;
|
||||
|
@ -161,12 +161,23 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of terms that must match the generated query expressed in the
|
||||
* common syntax for minimum should match. Defaults to <tt>30%</tt>.
|
||||
*
|
||||
* @see org.elasticsearch.common.lucene.search.Queries#calculateMinShouldMatch(int, String)
|
||||
*/
|
||||
public MoreLikeThisQueryBuilder minimumShouldMatch(String minimumShouldMatch) {
|
||||
this.minimumShouldMatch = minimumShouldMatch;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* The percentage of terms to match. Defaults to <tt>0.3</tt>.
|
||||
*/
|
||||
@Deprecated
|
||||
public MoreLikeThisQueryBuilder percentTermsToMatch(float percentTermsToMatch) {
|
||||
this.percentTermsToMatch = percentTermsToMatch;
|
||||
return this;
|
||||
return minimumShouldMatch((int) (percentTermsToMatch * 100) + "%");
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -287,8 +298,8 @@ public class MoreLikeThisQueryBuilder extends BaseQueryBuilder implements Boosta
|
|||
MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName() +"' or 'docs/ids' to be provided");
|
||||
}
|
||||
builder.field(MoreLikeThisQueryParser.Fields.LIKE_TEXT.getPreferredName(), likeText);
|
||||
if (percentTermsToMatch != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.PERCENT_TERMS_TO_MATCH.getPreferredName(), percentTermsToMatch);
|
||||
if (minimumShouldMatch != null) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.MINIMUM_SHOULD_MATCH.getPreferredName(), minimumShouldMatch);
|
||||
}
|
||||
if (minTermFreq != -1) {
|
||||
builder.field(MoreLikeThisQueryParser.Fields.MIN_TERM_FREQ.getPreferredName(), minTermFreq);
|
||||
|
|
|
@ -63,6 +63,7 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
public static final ParseField MIN_DOC_FREQ = new ParseField("min_doc_freq");
|
||||
public static final ParseField MAX_DOC_FREQ = new ParseField("max_doc_freq");
|
||||
public static final ParseField BOOST_TERMS = new ParseField("boost_terms");
|
||||
public static final ParseField MINIMUM_SHOULD_MATCH = new ParseField("minimum_should_match");
|
||||
public static final ParseField PERCENT_TERMS_TO_MATCH = new ParseField("percent_terms_to_match");
|
||||
public static final ParseField FAIL_ON_UNSUPPORTED_FIELD = new ParseField("fail_on_unsupported_field");
|
||||
public static final ParseField STOP_WORDS = new ParseField("stop_words");
|
||||
|
@ -124,8 +125,10 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
mltQuery.setBoostTerms(true);
|
||||
mltQuery.setBoostTermsFactor(boostFactor);
|
||||
}
|
||||
} else if (Fields.MINIMUM_SHOULD_MATCH.match(currentFieldName, parseContext.parseFlags())) {
|
||||
mltQuery.setMinimumShouldMatch(parser.text());
|
||||
} else if (Fields.PERCENT_TERMS_TO_MATCH.match(currentFieldName, parseContext.parseFlags())) {
|
||||
mltQuery.setPercentTermsToMatch(parser.floatValue());
|
||||
mltQuery.setMinimumShouldMatch((int) (parser.floatValue() * 100) + "%");
|
||||
} else if ("analyzer".equals(currentFieldName)) {
|
||||
analyzer = parseContext.analysisService().analyzer(parser.text());
|
||||
} else if ("boost".equals(currentFieldName)) {
|
||||
|
|
|
@ -56,7 +56,7 @@ public class RestMoreLikeThisAction extends BaseRestHandler {
|
|||
//needs some work if it is to be used in a REST context like this too
|
||||
// See the MoreLikeThisQueryParser constants that hold the valid syntax
|
||||
mltRequest.fields(request.paramAsStringArray("mlt_fields", null));
|
||||
mltRequest.percentTermsToMatch(request.paramAsFloat("percent_terms_to_match", -1));
|
||||
mltRequest.minimumShouldMatch(request.param("minimum_should_match", "0"));
|
||||
mltRequest.minTermFreq(request.paramAsInt("min_term_freq", -1));
|
||||
mltRequest.maxQueryTerms(request.paramAsInt("max_query_terms", -1));
|
||||
mltRequest.stopWords(request.paramAsStringArray("stop_words", null));
|
||||
|
|
|
@ -1638,7 +1638,7 @@ public class SimpleIndexQueryParserTests extends ElasticsearchSingleNodeTest {
|
|||
MoreLikeThisQuery mltQuery = (MoreLikeThisQuery) parsedQuery.getClauses()[0].getQuery();
|
||||
|
||||
// all terms must match
|
||||
mltQuery.setPercentTermsToMatch(1.0f);
|
||||
mltQuery.setMinimumShouldMatch("100%");
|
||||
mltQuery.setMinWordLen(0);
|
||||
mltQuery.setMinDocFreq(0);
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.junit.Test;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import static org.elasticsearch.client.Requests.*;
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
||||
|
@ -407,7 +408,8 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest {
|
|||
logger.info("Running MoreLikeThis DSL with IDs");
|
||||
String id = String.valueOf(getRandom().nextInt(texts.length));
|
||||
Client client = client();
|
||||
MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").ids(id).minTermFreq(1).minDocFreq(1);
|
||||
MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").ids(id).minTermFreq(1).minDocFreq(1)
|
||||
.minimumShouldMatch("0%");
|
||||
SearchResponse mltResponseDSL = client.prepareSearch()
|
||||
.setSearchType(SearchType.QUERY_THEN_FETCH)
|
||||
.setTypes("type1")
|
||||
|
@ -417,7 +419,8 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest {
|
|||
assertSearchResponse(mltResponseDSL);
|
||||
|
||||
logger.info("Running MoreLikeThis API");
|
||||
MoreLikeThisRequest mltRequest = moreLikeThisRequest("test").type("type1").searchSize(texts.length).id(id).minTermFreq(1).minDocFreq(1);
|
||||
MoreLikeThisRequest mltRequest = moreLikeThisRequest("test").type("type1").searchSize(texts.length).id(id).minTermFreq(1).minDocFreq(1)
|
||||
.minimumShouldMatch("0%");
|
||||
SearchResponse mltResponseAPI = client.moreLikeThis(mltRequest).actionGet();
|
||||
assertSearchResponse(mltResponseAPI);
|
||||
|
||||
|
@ -523,4 +526,43 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest {
|
|||
assertHitCount(response, values.length);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMinimumShouldMatch() throws ExecutionException, InterruptedException {
|
||||
logger.info("Creating the index ...");
|
||||
assertAcked(prepareCreate("test")
|
||||
.addMapping("type1", "text", "type=string,analyzer=whitespace")
|
||||
.setSettings(SETTING_NUMBER_OF_SHARDS, 1));
|
||||
ensureGreen();
|
||||
|
||||
logger.info("Indexing with each doc having one less term ...");
|
||||
List<IndexRequestBuilder> builders = new ArrayList<>();
|
||||
for (int i = 0; i < 10; i++) {
|
||||
String text = "";
|
||||
for (int j = 1; j <= 10 - i; j++) {
|
||||
text += j + " ";
|
||||
}
|
||||
builders.add(client().prepareIndex("test", "type1", i + "").setSource("text", text));
|
||||
}
|
||||
indexRandom(true, builders);
|
||||
|
||||
logger.info("Testing each minimum_should_match from 0% - 100% with 10% increment ...");
|
||||
for (int i = 0; i <= 10; i++) {
|
||||
String minimumShouldMatch = (10 * i) + "%";
|
||||
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery("text")
|
||||
.likeText("1 2 3 4 5 6 7 8 9 10")
|
||||
.minTermFreq(1)
|
||||
.minDocFreq(1)
|
||||
.minimumShouldMatch(minimumShouldMatch);
|
||||
logger.info("Testing with minimum_should_match = " + minimumShouldMatch);
|
||||
SearchResponse response = client().prepareSearch("test").setTypes("type1")
|
||||
.setQuery(mltQuery).get();
|
||||
assertSearchResponse(response);
|
||||
if (minimumShouldMatch.equals("0%")) {
|
||||
assertHitCount(response, 10);
|
||||
} else {
|
||||
assertHitCount(response, 11 - i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue