More Like This Query: creates only one MLT query per field for all queried items.
Previously, one MLT query per field was created for each item. One issue with this method is that the maximum number of selected terms was equal to the number of items times 'max_query_terms'. Instead, users should have direct control over the maximum number of selected terms allowed, regardless of the number of queried items. Another issue related to the previous method is that it could lead to the selection of rather uninteresting terms, that because they were found in a particular queried item. Instead, this new procedure enforces the selection of interesting terms across ALL items, not within each item. This could lead to search results where the best matching items share commonalities amongst the best characteristics of all the items. Closes #6404
This commit is contained in:
parent
c41e63c2f9
commit
35cba50fce
|
@ -27,11 +27,13 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.io.FastStringReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
|
@ -174,13 +176,17 @@ public class MoreLikeThisQuery extends Query {
|
|||
}
|
||||
|
||||
public void setLikeText(String likeText) {
|
||||
this.likeText = new String[]{likeText};
|
||||
setLikeText(new String[]{likeText});
|
||||
}
|
||||
|
||||
public void setLikeText(String... likeText) {
|
||||
this.likeText = likeText;
|
||||
}
|
||||
|
||||
public void setLikeText(List<String> likeText) {
|
||||
setLikeText(likeText.toArray(Strings.EMPTY_ARRAY));
|
||||
}
|
||||
|
||||
public String[] getMoreLikeFields() {
|
||||
return moreLikeFields;
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.index.query;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.ObjectArrays;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queries.TermsFilter;
|
||||
|
@ -207,9 +208,11 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
}
|
||||
// fetching the items with multi-get
|
||||
List<LikeText> likeTexts = fetchService.fetch(items);
|
||||
// collapse the text onto the same field name
|
||||
Collection<LikeText> likeTextsCollapsed = collapseTextOnField(likeTexts);
|
||||
// right now we are just building a boolean query
|
||||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
for (LikeText likeText : likeTexts) {
|
||||
for (LikeText likeText : likeTextsCollapsed) {
|
||||
addMoreLikeThis(boolQuery, mltQuery, likeText);
|
||||
}
|
||||
// exclude the items from the search
|
||||
|
@ -260,6 +263,19 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
|||
return moreLikeFields;
|
||||
}
|
||||
|
||||
public static Collection<LikeText> collapseTextOnField (Collection<LikeText> likeTexts) {
|
||||
Map<String, LikeText> collapsedTexts = new HashMap<>();
|
||||
for (LikeText likeText : likeTexts) {
|
||||
String field = likeText.field;
|
||||
String[] text = likeText.text;
|
||||
if (collapsedTexts.containsKey(field)) {
|
||||
text = ObjectArrays.concat(collapsedTexts.get(field).text, text, String.class);
|
||||
}
|
||||
collapsedTexts.put(field, new LikeText(field, text));
|
||||
}
|
||||
return collapsedTexts.values();
|
||||
}
|
||||
|
||||
private void removeUnsupportedFields(MultiGetRequest.Item item, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
|
||||
item.fields((String[]) removeUnsupportedFields(Arrays.asList(item.fields()), analyzer, failOnUnsupportedField).toArray());
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.elasticsearch.ElasticsearchException;
|
|||
import org.elasticsearch.action.get.MultiGetRequest;
|
||||
import org.elasticsearch.cache.recycler.CacheRecyclerModule;
|
||||
import org.elasticsearch.cluster.ClusterService;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.compress.CompressedString;
|
||||
import org.elasticsearch.common.inject.AbstractModule;
|
||||
|
@ -65,6 +66,7 @@ import org.elasticsearch.index.search.geo.GeoDistanceFilter;
|
|||
import org.elasticsearch.index.search.geo.GeoPolygonFilter;
|
||||
import org.elasticsearch.index.search.geo.InMemoryGeoBoundingBoxFilter;
|
||||
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
|
||||
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService.LikeText;
|
||||
import org.elasticsearch.index.settings.IndexSettingsModule;
|
||||
import org.elasticsearch.index.similarity.SimilarityModule;
|
||||
import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;
|
||||
|
@ -1680,19 +1682,14 @@ public class SimpleIndexQueryParserTests extends ElasticsearchTestCase {
|
|||
MoreLikeThisQueryParser parser = (MoreLikeThisQueryParser) queryParser.queryParser("more_like_this");
|
||||
parser.setFetchService(new MockMoreLikeThisFetchService());
|
||||
|
||||
List<MoreLikeThisFetchService.LikeText> likeTexts = new ArrayList<>();
|
||||
String index = "test";
|
||||
String type = "person";
|
||||
for (int i = 1; i < 5; i++) {
|
||||
for (String field : new String[]{"name.first", "name.last"}) {
|
||||
MoreLikeThisFetchService.LikeText likeText = new MoreLikeThisFetchService.LikeText(
|
||||
field, index + " " + type + " " + i + " " + field);
|
||||
likeTexts.add(likeText);
|
||||
}
|
||||
}
|
||||
List<LikeText> likeTexts = new ArrayList<>();
|
||||
likeTexts.add(new LikeText("name.first", new String[]{
|
||||
"test person 1 name.first", "test person 2 name.first", "test person 3 name.first", "test person 4 name.first"}));
|
||||
likeTexts.add(new LikeText("name.last", new String[]{
|
||||
"test person 1 name.last", "test person 2 name.last", "test person 3 name.last", "test person 4 name.last"}));
|
||||
|
||||
IndexQueryParserService queryParser = queryParser();
|
||||
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/mlt-ids.json");
|
||||
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/mlt-items.json");
|
||||
Query parsedQuery = queryParser.parse(query).query();
|
||||
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
|
||||
BooleanQuery booleanQuery = (BooleanQuery) parsedQuery;
|
||||
|
@ -1700,7 +1697,7 @@ public class SimpleIndexQueryParserTests extends ElasticsearchTestCase {
|
|||
|
||||
// check each clause is for each item
|
||||
BooleanClause[] boolClauses = booleanQuery.getClauses();
|
||||
for (int i=0; i<likeTexts.size(); i++) {
|
||||
for (int i = 0; i < likeTexts.size(); i++) {
|
||||
BooleanClause booleanClause = booleanQuery.getClauses()[i];
|
||||
assertThat(booleanClause.getOccur(), is(BooleanClause.Occur.SHOULD));
|
||||
assertThat(booleanClause.getQuery(), instanceOf(MoreLikeThisQuery.class));
|
||||
|
|
Loading…
Reference in New Issue