From d7338ffdbc5e636d777cf6d726663012170d1031 Mon Sep 17 00:00:00 2001 From: Alex Ksikes Date: Thu, 27 Nov 2014 09:47:02 +0100 Subject: [PATCH] MLT Query: Fix exclude with artificial documents Artificial documents get assigned a random id. When include is set to false (default), the ids of these documents also get included, when they should rather be ignored. Closes #8679 --- .../index/query/MoreLikeThisQueryParser.java | 24 +++++++++++++++---- .../mlt/MoreLikeThisActionTests.java | 18 ++++---------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java index ac2799edd0c..78c333d8591 100644 --- a/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MoreLikeThisQueryParser.java @@ -27,6 +27,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.action.termvectors.MultiTermVectorsRequest; import org.elasticsearch.action.termvectors.TermVectorsRequest; @@ -37,7 +38,6 @@ import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.MoreLikeThisQuery; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.Analysis; -import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.mapper.internal.UidFieldMapper; import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService; @@ -47,6 +47,8 @@ import java.util.Iterator; import java.util.List; import java.util.Set; +import static org.elasticsearch.index.mapper.Uid.createUidAsBytes; + /** * */ @@ -257,9 +259,7 @@ public class MoreLikeThisQueryParser implements QueryParser { boolQuery.add(mltQuery, BooleanClause.Occur.SHOULD); // exclude the items from the search if (!include) { - TermsFilter filter = new TermsFilter(UidFieldMapper.NAME, Uid.createUids(items.getRequests())); - ConstantScoreQuery query = new ConstantScoreQuery(filter); - boolQuery.add(query, BooleanClause.Occur.MUST_NOT); + handleExclude(boolQuery, items); } return boolQuery; } @@ -305,4 +305,20 @@ public class MoreLikeThisQueryParser implements QueryParser { } return moreLikeFields; } + + private void handleExclude(BooleanQuery boolQuery, MultiTermVectorsRequest likeItems) { + // artificial docs get assigned a random id and should be disregarded + List uids = new ArrayList<>(); + for (TermVectorsRequest item : likeItems) { + if (item.doc() != null) { + continue; + } + uids.add(createUidAsBytes(item.type(), item.id())); + } + if (!uids.isEmpty()) { + TermsFilter filter = new TermsFilter(UidFieldMapper.NAME, uids.toArray(new BytesRef[0])); + ConstantScoreQuery query = new ConstantScoreQuery(filter); + boolQuery.add(query, BooleanClause.Occur.MUST_NOT); + } + } } \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/mlt/MoreLikeThisActionTests.java b/src/test/java/org/elasticsearch/mlt/MoreLikeThisActionTests.java index 0edafa4ffb2..e6d1da146d2 100644 --- a/src/test/java/org/elasticsearch/mlt/MoreLikeThisActionTests.java +++ b/src/test/java/org/elasticsearch/mlt/MoreLikeThisActionTests.java @@ -20,7 +20,6 @@ package org.elasticsearch.mlt; import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.LuceneTestCase; import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder; @@ -569,17 +568,10 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest { } @Test - @LuceneTestCase.AwaitsFix(bugUrl = "alex k working on it") public void testMoreLikeThisArtificialDocs() throws Exception { int numFields = randomIntBetween(5, 10); - logger.info("Creating an index with multiple fields ..."); - XContentBuilder mapping = jsonBuilder().startObject().startObject("type1").startObject("properties"); - for (int i = 0; i < numFields; i++) { - mapping.startObject("field"+i).field("type", "string").endObject(); - } - mapping.endObject().endObject().endObject(); - assertAcked(prepareCreate("test").addMapping("type1", mapping).get()); + createIndex("test"); ensureGreen(); logger.info("Indexing a single document ..."); @@ -588,17 +580,15 @@ public class MoreLikeThisActionTests extends ElasticsearchIntegrationTest { doc.field("field"+i, generateRandomStringArray(5, 10)); } doc.endObject(); - List builders = new ArrayList<>(); - builders.add(client().prepareIndex("test", "type1", "1").setSource(doc)); - indexRandom(true, builders); + indexRandom(true, client().prepareIndex("test", "type1", "0").setSource(doc)); logger.info("Checking the document matches ..."); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery() - .docs((Item) new Item().doc(doc).index("test").type("type1")) + .like((Item) new Item().doc(doc).index("test").type("type1")) .minTermFreq(0) .minDocFreq(0) .maxQueryTerms(100) - .percentTermsToMatch(1); // strict all terms must match! + .minimumShouldMatch("100%"); // strict all terms must match! SearchResponse response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response);