Added the ability to include the queried document for More Like This API.

By default More Like This API excludes the queried document from the response.
However, when debugging or when comparing scores across different queries, it
could be useful to have the best possible matched hit. So this option lets users
explicitly specify the desired behavior.

Closes #6067
This commit is contained in:
Alex Ksikes 2014-05-06 16:19:23 +02:00
parent 48b7172ee7
commit dae48d9fe8
6 changed files with 79 additions and 3 deletions

View File

@ -25,5 +25,8 @@ Rest parameters relating to search are also allowed, including
When no `mlt_fields` are specified, all the fields of the document will
be used in the `more_like_this` query generated.
By default, the queried document is excluded from the response (`include`
set to false).
Note: In order to use the `mlt` feature a `mlt_field` needs to be either
be `stored`, store `term_vector` or `source` needs to be enabled.

View File

@ -21,6 +21,7 @@ package org.elasticsearch.action.mlt;
import org.elasticsearch.ElasticsearchGenerationException;
import org.elasticsearch.ElasticsearchIllegalArgumentException;
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.ValidateActions;
@ -72,6 +73,7 @@ public class MoreLikeThisRequest extends ActionRequest<MoreLikeThisRequest> {
private int minWordLength = -1;
private int maxWordLength = -1;
private float boostTerms = -1;
private boolean include = false;
private SearchType searchType = SearchType.DEFAULT;
private int searchSize = 0;
@ -313,6 +315,21 @@ public class MoreLikeThisRequest extends ActionRequest<MoreLikeThisRequest> {
return this.boostTerms;
}
/**
* Whether to include the queried document. Defaults to <tt>false</tt>.
*/
public MoreLikeThisRequest include(boolean include) {
this.include = include;
return this;
}
/**
* Whether to include the queried document. Defaults to <tt>false</tt>.
*/
public boolean include() {
return this.include;
}
void beforeLocalFork() {
if (searchSourceUnsafe) {
searchSource = searchSource.copyBytesArray();
@ -553,6 +570,12 @@ public class MoreLikeThisRequest extends ActionRequest<MoreLikeThisRequest> {
minWordLength = in.readVInt();
maxWordLength = in.readVInt();
boostTerms = in.readFloat();
if (in.getVersion().onOrAfter(Version.V_1_2_0)) {
include = in.readBoolean();
} else {
include = false; // hard-coded behavior until Elasticsearch 1.2
}
searchType = SearchType.fromId(in.readByte());
if (in.readBoolean()) {
searchQueryHint = in.readString();
@ -622,6 +645,9 @@ public class MoreLikeThisRequest extends ActionRequest<MoreLikeThisRequest> {
out.writeVInt(minWordLength);
out.writeVInt(maxWordLength);
out.writeFloat(boostTerms);
if (out.getVersion().onOrAfter(Version.V_1_2_0)) {
out.writeBoolean(include);
}
out.writeByte(searchType.id());
if (searchQueryHint == null) {

View File

@ -140,6 +140,14 @@ public class MoreLikeThisRequestBuilder extends ActionRequestBuilder<MoreLikeThi
return this;
}
/**
* Whether to include the queried document. Defaults to <tt>false</tt>.
*/
public MoreLikeThisRequestBuilder setInclude(boolean include) {
request.include(include);
return this;
}
/**
* An optional search source request allowing to control the search request for the
* more like this documents.

View File

@ -178,9 +178,11 @@ public class TransportMoreLikeThisAction extends TransportAction<MoreLikeThisReq
}
// exclude myself
if (!request.include()) {
Term uidTerm = docMapper.uidMapper().term(request.type(), request.id());
boolBuilder.mustNot(termQuery(uidTerm.field(), uidTerm.text()));
boolBuilder.adjustPureNegative(false);
}
} catch (Throwable e) {
listener.onFailure(e);
return;

View File

@ -70,6 +70,7 @@ public class RestMoreLikeThisAction extends BaseRestHandler {
mltRequest.minWordLength(request.paramAsInt("min_word_len", request.paramAsInt("min_word_length", -1)));
mltRequest.maxWordLength(request.paramAsInt("max_word_len", request.paramAsInt("max_word_length", -1)));
mltRequest.boostTerms(request.paramAsFloat("boost_terms", -1));
mltRequest.include(request.paramAsBoolean("include", false));
mltRequest.searchType(SearchType.fromString(request.param("search_type")));
mltRequest.searchIndices(request.paramAsStringArray("search_indices", null));

View File

@ -252,4 +252,40 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest {
assertHitCount(searchResponse, 0l);
}
@Test
public void testSimpleMoreLikeInclude() throws Exception {
logger.info("Creating index test");
assertAcked(prepareCreate("test").addMapping("type1",
jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("text").field("type", "string").endObject()
.endObject().endObject().endObject()));
logger.info("Running Cluster Health");
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
logger.info("Indexing...");
client().index(indexRequest("test").type("type1").id("1").source(
jsonBuilder().startObject()
.field("text", "Apache Lucene is a free/open source information retrieval software library").endObject()))
.actionGet();
client().index(indexRequest("test").type("type1").id("2").source(
jsonBuilder().startObject()
.field("text", "Lucene has been ported to other programming languages").endObject()))
.actionGet();
client().admin().indices().refresh(refreshRequest()).actionGet();
logger.info("Running More Like This with include true");
SearchResponse mltResponse = client().moreLikeThis(
moreLikeThisRequest("test").type("type1").id("1").minTermFreq(1).minDocFreq(1).include(true)).actionGet();
assertOrderedSearchHits(mltResponse, "1", "2");
mltResponse = client().moreLikeThis(
moreLikeThisRequest("test").type("type1").id("2").minTermFreq(1).minDocFreq(1).include(true)).actionGet();
assertOrderedSearchHits(mltResponse, "2", "1");
logger.info("Running More Like This with include false");
mltResponse = client().moreLikeThis(moreLikeThisRequest("test").type("type1").id("1").minTermFreq(1).minDocFreq(1)).actionGet();
assertSearchHits(mltResponse, "2");
}
}