MLT Query: Fix exclude with artificial documents
Artificial documents get assigned a random id. When include is set to false (default), the ids of these documents also get included, when they should rather be ignored. Closes #8679
This commit is contained in:
parent
fe762c0eb5
commit
d7338ffdbc
|
@ -27,6 +27,7 @@ import org.apache.lucene.search.BooleanClause;
|
|||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.action.termvectors.MultiTermVectorsRequest;
|
||||
import org.elasticsearch.action.termvectors.TermVectorsRequest;
|
||||
|
@ -37,7 +38,6 @@ import org.elasticsearch.common.inject.Inject;
|
|||
import org.elasticsearch.common.lucene.search.MoreLikeThisQuery;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
import org.elasticsearch.index.mapper.Uid;
|
||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
|
||||
|
||||
|
@ -47,6 +47,8 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.elasticsearch.index.mapper.Uid.createUidAsBytes;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
@ -257,9 +259,7 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD);
|
||||
// exclude the items from the search
|
||||
if (!include) {
|
||||
TermsFilter filter = new TermsFilter(UidFieldMapper.NAME, Uid.createUids(items.getRequests()));
|
||||
ConstantScoreQuery query = new ConstantScoreQuery(filter);
|
||||
boolQuery.add(query, BooleanClause.Occur.MUST_NOT);
|
||||
handleExclude(boolQuery, items);
|
||||
}
|
||||
return boolQuery;
|
||||
}
|
||||
|
@ -305,4 +305,20 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
}
|
||||
return moreLikeFields;
|
||||
}
|
||||
|
||||
private void handleExclude(BooleanQuery boolQuery, MultiTermVectorsRequest likeItems) {
|
||||
// artificial docs get assigned a random id and should be disregarded
|
||||
List<BytesRef> uids = new ArrayList<>();
|
||||
for (TermVectorsRequest item : likeItems) {
|
||||
if (item.doc() != null) {
|
||||
continue;
|
||||
}
|
||||
uids.add(createUidAsBytes(item.type(), item.id()));
|
||||
}
|
||||
if (!uids.isEmpty()) {
|
||||
TermsFilter filter = new TermsFilter(UidFieldMapper.NAME, uids.toArray(new BytesRef[0]));
|
||||
ConstantScoreQuery query = new ConstantScoreQuery(filter);
|
||||
boolQuery.add(query, BooleanClause.Occur.MUST_NOT);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -20,7 +20,6 @@
|
|||
package org.elasticsearch.mlt;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
|
||||
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
|
||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
|
@ -569,17 +568,10 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
@LuceneTestCase.AwaitsFix(bugUrl = "alex k working on it")
|
||||
public void testMoreLikeThisArtificialDocs() throws Exception {
|
||||
int numFields = randomIntBetween(5, 10);
|
||||
|
||||
logger.info("Creating an index with multiple fields ...");
|
||||
XContentBuilder mapping = jsonBuilder().startObject().startObject("type1").startObject("properties");
|
||||
for (int i = 0; i < numFields; i++) {
|
||||
mapping.startObject("field"+i).field("type", "string").endObject();
|
||||
}
|
||||
mapping.endObject().endObject().endObject();
|
||||
assertAcked(prepareCreate("test").addMapping("type1", mapping).get());
|
||||
createIndex("test");
|
||||
ensureGreen();
|
||||
|
||||
logger.info("Indexing a single document ...");
|
||||
|
@ -588,17 +580,15 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest {
|
|||
doc.field("field"+i, generateRandomStringArray(5, 10));
|
||||
}
|
||||
doc.endObject();
|
||||
List<IndexRequestBuilder> builders = new ArrayList<>();
|
||||
builders.add(client().prepareIndex("test", "type1", "1").setSource(doc));
|
||||
indexRandom(true, builders);
|
||||
indexRandom(true, client().prepareIndex("test", "type1", "0").setSource(doc));
|
||||
|
||||
logger.info("Checking the document matches ...");
|
||||
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery()
|
||||
.docs((Item) new Item().doc(doc).index("test").type("type1"))
|
||||
.like((Item) new Item().doc(doc).index("test").type("type1"))
|
||||
.minTermFreq(0)
|
||||
.minDocFreq(0)
|
||||
.maxQueryTerms(100)
|
||||
.percentTermsToMatch(1); // strict all terms must match!
|
||||
.minimumShouldMatch("100%"); // strict all terms must match!
|
||||
SearchResponse response = client().prepareSearch("test").setTypes("type1")
|
||||
.setQuery(mltQuery).get();
|
||||
assertSearchResponse(response);
|
||||
|
|
Loading…
Reference in New Issue