MLT Query: Fix exclude with artificial documents
Artificial documents get assigned a random id. When include is set to false (default), the ids of these documents also get included, when they should rather be ignored. Closes #8679
This commit is contained in:
parent
fe762c0eb5
commit
d7338ffdbc
|
@ -27,6 +27,7 @@ import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||||
import org.elasticsearch.action.termvectors.MultiTermVectorsRequest;
|
import org.elasticsearch.action.termvectors.MultiTermVectorsRequest;
|
||||||
import org.elasticsearch.action.termvectors.TermVectorsRequest;
|
import org.elasticsearch.action.termvectors.TermVectorsRequest;
|
||||||
|
@ -37,7 +38,6 @@ import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
|
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.index.analysis.Analysis;
|
import org.elasticsearch.index.analysis.Analysis;
|
||||||
import org.elasticsearch.index.mapper.Uid;
|
|
||||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||||
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
|
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
|
||||||
|
|
||||||
|
@ -47,6 +47,8 @@ import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import static org.elasticsearch.index.mapper.Uid.createUidAsBytes;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
@ -257,9 +259,7 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
||||||
boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD);
|
boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD);
|
||||||
// exclude the items from the search
|
// exclude the items from the search
|
||||||
if (!include) {
|
if (!include) {
|
||||||
TermsFilter filter = new TermsFilter(UidFieldMapper.NAME, Uid.createUids(items.getRequests()));
|
handleExclude(boolQuery, items);
|
||||||
ConstantScoreQuery query = new ConstantScoreQuery(filter);
|
|
||||||
boolQuery.add(query, BooleanClause.Occur.MUST_NOT);
|
|
||||||
}
|
}
|
||||||
return boolQuery;
|
return boolQuery;
|
||||||
}
|
}
|
||||||
|
@ -305,4 +305,20 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
||||||
}
|
}
|
||||||
return moreLikeFields;
|
return moreLikeFields;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void handleExclude(BooleanQuery boolQuery, MultiTermVectorsRequest likeItems) {
|
||||||
|
// artificial docs get assigned a random id and should be disregarded
|
||||||
|
List<BytesRef> uids = new ArrayList<>();
|
||||||
|
for (TermVectorsRequest item : likeItems) {
|
||||||
|
if (item.doc() != null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
uids.add(createUidAsBytes(item.type(), item.id()));
|
||||||
|
}
|
||||||
|
if (!uids.isEmpty()) {
|
||||||
|
TermsFilter filter = new TermsFilter(UidFieldMapper.NAME, uids.toArray(new BytesRef[0]));
|
||||||
|
ConstantScoreQuery query = new ConstantScoreQuery(filter);
|
||||||
|
boolQuery.add(query, BooleanClause.Occur.MUST_NOT);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -20,7 +20,6 @@
|
||||||
package org.elasticsearch.mlt;
|
package org.elasticsearch.mlt;
|
||||||
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
|
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
|
||||||
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
|
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
|
||||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||||
|
@ -569,17 +568,10 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@LuceneTestCase.AwaitsFix(bugUrl = "alex k working on it")
|
|
||||||
public void testMoreLikeThisArtificialDocs() throws Exception {
|
public void testMoreLikeThisArtificialDocs() throws Exception {
|
||||||
int numFields = randomIntBetween(5, 10);
|
int numFields = randomIntBetween(5, 10);
|
||||||
|
|
||||||
logger.info("Creating an index with multiple fields ...");
|
createIndex("test");
|
||||||
XContentBuilder mapping = jsonBuilder().startObject().startObject("type1").startObject("properties");
|
|
||||||
for (int i = 0; i < numFields; i++) {
|
|
||||||
mapping.startObject("field"+i).field("type", "string").endObject();
|
|
||||||
}
|
|
||||||
mapping.endObject().endObject().endObject();
|
|
||||||
assertAcked(prepareCreate("test").addMapping("type1", mapping).get());
|
|
||||||
ensureGreen();
|
ensureGreen();
|
||||||
|
|
||||||
logger.info("Indexing a single document ...");
|
logger.info("Indexing a single document ...");
|
||||||
|
@ -588,17 +580,15 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest {
|
||||||
doc.field("field"+i, generateRandomStringArray(5, 10));
|
doc.field("field"+i, generateRandomStringArray(5, 10));
|
||||||
}
|
}
|
||||||
doc.endObject();
|
doc.endObject();
|
||||||
List<IndexRequestBuilder> builders = new ArrayList<>();
|
indexRandom(true, client().prepareIndex("test", "type1", "0").setSource(doc));
|
||||||
builders.add(client().prepareIndex("test", "type1", "1").setSource(doc));
|
|
||||||
indexRandom(true, builders);
|
|
||||||
|
|
||||||
logger.info("Checking the document matches ...");
|
logger.info("Checking the document matches ...");
|
||||||
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery()
|
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery()
|
||||||
.docs((Item) new Item().doc(doc).index("test").type("type1"))
|
.like((Item) new Item().doc(doc).index("test").type("type1"))
|
||||||
.minTermFreq(0)
|
.minTermFreq(0)
|
||||||
.minDocFreq(0)
|
.minDocFreq(0)
|
||||||
.maxQueryTerms(100)
|
.maxQueryTerms(100)
|
||||||
.percentTermsToMatch(1); // strict all terms must match!
|
.minimumShouldMatch("100%"); // strict all terms must match!
|
||||||
SearchResponse response = client().prepareSearch("test").setTypes("type1")
|
SearchResponse response = client().prepareSearch("test").setTypes("type1")
|
||||||
.setQuery(mltQuery).get();
|
.setQuery(mltQuery).get();
|
||||||
assertSearchResponse(response);
|
assertSearchResponse(response);
|
||||||
|
|
Loading…
Reference in New Issue