More Like This Query: creates only one MLT query per field for all queried items.
Previously, one MLT query per field was created for each item. One issue with this method is that the maximum number of selected terms was equal to the number of items times 'max_query_terms'. Instead, users should have direct control over the maximum number of selected terms allowed, regardless of the number of queried items. Another issue related to the previous method is that it could lead to the selection of rather uninteresting terms, that because they were found in a particular queried item. Instead, this new procedure enforces the selection of interesting terms across ALL items, not within each item. This could lead to search results where the best matching items share commonalities amongst the best characteristics of all the items. Closes #6404
This commit is contained in:
parent
c41e63c2f9
commit
35cba50fce
|
@ -27,11 +27,13 @@ import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
||||||
|
import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.io.FastStringReader;
|
import org.elasticsearch.common.io.FastStringReader;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -174,13 +176,17 @@ public class MoreLikeThisQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLikeText(String likeText) {
|
public void setLikeText(String likeText) {
|
||||||
this.likeText = new String[]{likeText};
|
setLikeText(new String[]{likeText});
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLikeText(String... likeText) {
|
public void setLikeText(String... likeText) {
|
||||||
this.likeText = likeText;
|
this.likeText = likeText;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setLikeText(List<String> likeText) {
|
||||||
|
setLikeText(likeText.toArray(Strings.EMPTY_ARRAY));
|
||||||
|
}
|
||||||
|
|
||||||
public String[] getMoreLikeFields() {
|
public String[] getMoreLikeFields() {
|
||||||
return moreLikeFields;
|
return moreLikeFields;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
package org.elasticsearch.index.query;
|
package org.elasticsearch.index.query;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.common.collect.ObjectArrays;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.queries.TermsFilter;
|
import org.apache.lucene.queries.TermsFilter;
|
||||||
|
@ -207,9 +208,11 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
||||||
}
|
}
|
||||||
// fetching the items with multi-get
|
// fetching the items with multi-get
|
||||||
List<LikeText> likeTexts = fetchService.fetch(items);
|
List<LikeText> likeTexts = fetchService.fetch(items);
|
||||||
|
// collapse the text onto the same field name
|
||||||
|
Collection<LikeText> likeTextsCollapsed = collapseTextOnField(likeTexts);
|
||||||
// right now we are just building a boolean query
|
// right now we are just building a boolean query
|
||||||
BooleanQuery boolQuery = new BooleanQuery();
|
BooleanQuery boolQuery = new BooleanQuery();
|
||||||
for (LikeText likeText : likeTexts) {
|
for (LikeText likeText : likeTextsCollapsed) {
|
||||||
addMoreLikeThis(boolQuery, mltQuery, likeText);
|
addMoreLikeThis(boolQuery, mltQuery, likeText);
|
||||||
}
|
}
|
||||||
// exclude the items from the search
|
// exclude the items from the search
|
||||||
|
@ -260,6 +263,19 @@ public class MoreLikeThisQueryParser implements QueryParser {
|
||||||
return moreLikeFields;
|
return moreLikeFields;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Collection<LikeText> collapseTextOnField (Collection<LikeText> likeTexts) {
|
||||||
|
Map<String, LikeText> collapsedTexts = new HashMap<>();
|
||||||
|
for (LikeText likeText : likeTexts) {
|
||||||
|
String field = likeText.field;
|
||||||
|
String[] text = likeText.text;
|
||||||
|
if (collapsedTexts.containsKey(field)) {
|
||||||
|
text = ObjectArrays.concat(collapsedTexts.get(field).text, text, String.class);
|
||||||
|
}
|
||||||
|
collapsedTexts.put(field, new LikeText(field, text));
|
||||||
|
}
|
||||||
|
return collapsedTexts.values();
|
||||||
|
}
|
||||||
|
|
||||||
private void removeUnsupportedFields(MultiGetRequest.Item item, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
|
private void removeUnsupportedFields(MultiGetRequest.Item item, Analyzer analyzer, boolean failOnUnsupportedField) throws IOException {
|
||||||
item.fields((String[]) removeUnsupportedFields(Arrays.asList(item.fields()), analyzer, failOnUnsupportedField).toArray());
|
item.fields((String[]) removeUnsupportedFields(Arrays.asList(item.fields()), analyzer, failOnUnsupportedField).toArray());
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.elasticsearch.ElasticsearchException;
|
||||||
import org.elasticsearch.action.get.MultiGetRequest;
|
import org.elasticsearch.action.get.MultiGetRequest;
|
||||||
import org.elasticsearch.cache.recycler.CacheRecyclerModule;
|
import org.elasticsearch.cache.recycler.CacheRecyclerModule;
|
||||||
import org.elasticsearch.cluster.ClusterService;
|
import org.elasticsearch.cluster.ClusterService;
|
||||||
|
import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.bytes.BytesArray;
|
import org.elasticsearch.common.bytes.BytesArray;
|
||||||
import org.elasticsearch.common.compress.CompressedString;
|
import org.elasticsearch.common.compress.CompressedString;
|
||||||
import org.elasticsearch.common.inject.AbstractModule;
|
import org.elasticsearch.common.inject.AbstractModule;
|
||||||
|
@ -65,6 +66,7 @@ import org.elasticsearch.index.search.geo.GeoDistanceFilter;
|
||||||
import org.elasticsearch.index.search.geo.GeoPolygonFilter;
|
import org.elasticsearch.index.search.geo.GeoPolygonFilter;
|
||||||
import org.elasticsearch.index.search.geo.InMemoryGeoBoundingBoxFilter;
|
import org.elasticsearch.index.search.geo.InMemoryGeoBoundingBoxFilter;
|
||||||
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
|
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService;
|
||||||
|
import org.elasticsearch.index.search.morelikethis.MoreLikeThisFetchService.LikeText;
|
||||||
import org.elasticsearch.index.settings.IndexSettingsModule;
|
import org.elasticsearch.index.settings.IndexSettingsModule;
|
||||||
import org.elasticsearch.index.similarity.SimilarityModule;
|
import org.elasticsearch.index.similarity.SimilarityModule;
|
||||||
import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;
|
import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService;
|
||||||
|
@ -1680,19 +1682,14 @@ public class SimpleIndexQueryParserTests extends ElasticsearchTestCase {
|
||||||
MoreLikeThisQueryParser parser = (MoreLikeThisQueryParser) queryParser.queryParser("more_like_this");
|
MoreLikeThisQueryParser parser = (MoreLikeThisQueryParser) queryParser.queryParser("more_like_this");
|
||||||
parser.setFetchService(new MockMoreLikeThisFetchService());
|
parser.setFetchService(new MockMoreLikeThisFetchService());
|
||||||
|
|
||||||
List<MoreLikeThisFetchService.LikeText> likeTexts = new ArrayList<>();
|
List<LikeText> likeTexts = new ArrayList<>();
|
||||||
String index = "test";
|
likeTexts.add(new LikeText("name.first", new String[]{
|
||||||
String type = "person";
|
"test person 1 name.first", "test person 2 name.first", "test person 3 name.first", "test person 4 name.first"}));
|
||||||
for (int i = 1; i < 5; i++) {
|
likeTexts.add(new LikeText("name.last", new String[]{
|
||||||
for (String field : new String[]{"name.first", "name.last"}) {
|
"test person 1 name.last", "test person 2 name.last", "test person 3 name.last", "test person 4 name.last"}));
|
||||||
MoreLikeThisFetchService.LikeText likeText = new MoreLikeThisFetchService.LikeText(
|
|
||||||
field, index + " " + type + " " + i + " " + field);
|
|
||||||
likeTexts.add(likeText);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
IndexQueryParserService queryParser = queryParser();
|
IndexQueryParserService queryParser = queryParser();
|
||||||
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/mlt-ids.json");
|
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/mlt-items.json");
|
||||||
Query parsedQuery = queryParser.parse(query).query();
|
Query parsedQuery = queryParser.parse(query).query();
|
||||||
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
|
assertThat(parsedQuery, instanceOf(BooleanQuery.class));
|
||||||
BooleanQuery booleanQuery = (BooleanQuery) parsedQuery;
|
BooleanQuery booleanQuery = (BooleanQuery) parsedQuery;
|
||||||
|
@ -1700,7 +1697,7 @@ public class SimpleIndexQueryParserTests extends ElasticsearchTestCase {
|
||||||
|
|
||||||
// check each clause is for each item
|
// check each clause is for each item
|
||||||
BooleanClause[] boolClauses = booleanQuery.getClauses();
|
BooleanClause[] boolClauses = booleanQuery.getClauses();
|
||||||
for (int i=0; i<likeTexts.size(); i++) {
|
for (int i = 0; i < likeTexts.size(); i++) {
|
||||||
BooleanClause booleanClause = booleanQuery.getClauses()[i];
|
BooleanClause booleanClause = booleanQuery.getClauses()[i];
|
||||||
assertThat(booleanClause.getOccur(), is(BooleanClause.Occur.SHOULD));
|
assertThat(booleanClause.getOccur(), is(BooleanClause.Occur.SHOULD));
|
||||||
assertThat(booleanClause.getQuery(), instanceOf(MoreLikeThisQuery.class));
|
assertThat(booleanClause.getQuery(), instanceOf(MoreLikeThisQuery.class));
|
||||||
|
|
Loading…
Reference in New Issue