Added the ability to include the queried document for More Like This API.
By default More Like This API excludes the queried document from the response. However, when debugging or when comparing scores across different queries, it could be useful to have the best possible matched hit. So this option lets users explicitly specify the desired behavior. Closes #6067
This commit is contained in:
parent
48b7172ee7
commit
dae48d9fe8
|
@ -25,5 +25,8 @@ Rest parameters relating to search are also allowed, including
|
|||
When no `mlt_fields` are specified, all the fields of the document will
|
||||
be used in the `more_like_this` query generated.
|
||||
|
||||
By default, the queried document is excluded from the response (`include`
|
||||
set to false).
|
||||
|
||||
Note: In order to use the `mlt` feature a `mlt_field` needs to be either
|
||||
be `stored`, store `term_vector` or `source` needs to be enabled.
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.action.mlt;
|
|||
|
||||
import org.elasticsearch.ElasticsearchGenerationException;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.ActionRequest;
|
||||
import org.elasticsearch.action.ActionRequestValidationException;
|
||||
import org.elasticsearch.action.ValidateActions;
|
||||
|
@ -72,6 +73,7 @@ public class MoreLikeThisRequest extends ActionRequest<MoreLikeThisRequest> {
|
|||
private int minWordLength = -1;
|
||||
private int maxWordLength = -1;
|
||||
private float boostTerms = -1;
|
||||
private boolean include = false;
|
||||
|
||||
private SearchType searchType = SearchType.DEFAULT;
|
||||
private int searchSize = 0;
|
||||
|
@ -313,6 +315,21 @@ public class MoreLikeThisRequest extends ActionRequest<MoreLikeThisRequest> {
|
|||
return this.boostTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to include the queried document. Defaults to <tt>false</tt>.
|
||||
*/
|
||||
public MoreLikeThisRequest include(boolean include) {
|
||||
this.include = include;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to include the queried document. Defaults to <tt>false</tt>.
|
||||
*/
|
||||
public boolean include() {
|
||||
return this.include;
|
||||
}
|
||||
|
||||
void beforeLocalFork() {
|
||||
if (searchSourceUnsafe) {
|
||||
searchSource = searchSource.copyBytesArray();
|
||||
|
@ -553,6 +570,12 @@ public class MoreLikeThisRequest extends ActionRequest<MoreLikeThisRequest> {
|
|||
minWordLength = in.readVInt();
|
||||
maxWordLength = in.readVInt();
|
||||
boostTerms = in.readFloat();
|
||||
if (in.getVersion().onOrAfter(Version.V_1_2_0)) {
|
||||
include = in.readBoolean();
|
||||
} else {
|
||||
include = false; // hard-coded behavior until Elasticsearch 1.2
|
||||
}
|
||||
|
||||
searchType = SearchType.fromId(in.readByte());
|
||||
if (in.readBoolean()) {
|
||||
searchQueryHint = in.readString();
|
||||
|
@ -622,6 +645,9 @@ public class MoreLikeThisRequest extends ActionRequest<MoreLikeThisRequest> {
|
|||
out.writeVInt(minWordLength);
|
||||
out.writeVInt(maxWordLength);
|
||||
out.writeFloat(boostTerms);
|
||||
if (out.getVersion().onOrAfter(Version.V_1_2_0)) {
|
||||
out.writeBoolean(include);
|
||||
}
|
||||
|
||||
out.writeByte(searchType.id());
|
||||
if (searchQueryHint == null) {
|
||||
|
|
|
@ -140,6 +140,14 @@ public class MoreLikeThisRequestBuilder extends ActionRequestBuilder<MoreLikeThi
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to include the queried document. Defaults to <tt>false</tt>.
|
||||
*/
|
||||
public MoreLikeThisRequestBuilder setInclude(boolean include) {
|
||||
request.include(include);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* An optional search source request allowing to control the search request for the
|
||||
* more like this documents.
|
||||
|
|
|
@ -178,9 +178,11 @@ public class TransportMoreLikeThisAction extends TransportAction<MoreLikeThisReq
|
|||
}
|
||||
|
||||
// exclude myself
|
||||
Term uidTerm = docMapper.uidMapper().term(request.type(), request.id());
|
||||
boolBuilder.mustNot(termQuery(uidTerm.field(), uidTerm.text()));
|
||||
boolBuilder.adjustPureNegative(false);
|
||||
if (!request.include()) {
|
||||
Term uidTerm = docMapper.uidMapper().term(request.type(), request.id());
|
||||
boolBuilder.mustNot(termQuery(uidTerm.field(), uidTerm.text()));
|
||||
boolBuilder.adjustPureNegative(false);
|
||||
}
|
||||
} catch (Throwable e) {
|
||||
listener.onFailure(e);
|
||||
return;
|
||||
|
|
|
@ -70,6 +70,7 @@ public class RestMoreLikeThisAction extends BaseRestHandler {
|
|||
mltRequest.minWordLength(request.paramAsInt("min_word_len", request.paramAsInt("min_word_length", -1)));
|
||||
mltRequest.maxWordLength(request.paramAsInt("max_word_len", request.paramAsInt("max_word_length", -1)));
|
||||
mltRequest.boostTerms(request.paramAsFloat("boost_terms", -1));
|
||||
mltRequest.include(request.paramAsBoolean("include", false));
|
||||
|
||||
mltRequest.searchType(SearchType.fromString(request.param("search_type")));
|
||||
mltRequest.searchIndices(request.paramAsStringArray("search_indices", null));
|
||||
|
|
|
@ -252,4 +252,40 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest {
|
|||
assertHitCount(searchResponse, 0l);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleMoreLikeInclude() throws Exception {
|
||||
logger.info("Creating index test");
|
||||
assertAcked(prepareCreate("test").addMapping("type1",
|
||||
jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||
.startObject("text").field("type", "string").endObject()
|
||||
.endObject().endObject().endObject()));
|
||||
|
||||
logger.info("Running Cluster Health");
|
||||
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
|
||||
|
||||
logger.info("Indexing...");
|
||||
client().index(indexRequest("test").type("type1").id("1").source(
|
||||
jsonBuilder().startObject()
|
||||
.field("text", "Apache Lucene is a free/open source information retrieval software library").endObject()))
|
||||
.actionGet();
|
||||
client().index(indexRequest("test").type("type1").id("2").source(
|
||||
jsonBuilder().startObject()
|
||||
.field("text", "Lucene has been ported to other programming languages").endObject()))
|
||||
.actionGet();
|
||||
client().admin().indices().refresh(refreshRequest()).actionGet();
|
||||
|
||||
logger.info("Running More Like This with include true");
|
||||
SearchResponse mltResponse = client().moreLikeThis(
|
||||
moreLikeThisRequest("test").type("type1").id("1").minTermFreq(1).minDocFreq(1).include(true)).actionGet();
|
||||
assertOrderedSearchHits(mltResponse, "1", "2");
|
||||
|
||||
mltResponse = client().moreLikeThis(
|
||||
moreLikeThisRequest("test").type("type1").id("2").minTermFreq(1).minDocFreq(1).include(true)).actionGet();
|
||||
assertOrderedSearchHits(mltResponse, "2", "1");
|
||||
|
||||
logger.info("Running More Like This with include false");
|
||||
mltResponse = client().moreLikeThis(moreLikeThisRequest("test").type("type1").id("1").minTermFreq(1).minDocFreq(1)).actionGet();
|
||||
assertSearchHits(mltResponse, "2");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue