mirror of https://github.com/apache/lucene.git
SOLR-7143: MoreLikeThis Query parser now handles multiple field names
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1689531 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
de18625659
commit
0325ef2ae4
|
@ -753,9 +753,10 @@ public final class MoreLikeThis {
|
|||
IOException {
|
||||
HashMap<String,Int> termFreqMap = new HashMap();
|
||||
for (String fieldName : fieldNames) {
|
||||
|
||||
for (String field : fields.keySet()) {
|
||||
Collection<Object> fieldValues = fields.get(field);
|
||||
if(fieldValues == null)
|
||||
continue;
|
||||
for(Object fieldValue:fieldValues) {
|
||||
if (fieldValue != null) {
|
||||
addTermFrequencies(new StringReader(String.valueOf(fieldValue)), termFreqMap,
|
||||
|
|
|
@ -209,6 +209,9 @@ Bug Fixes
|
|||
* SOLR-7741: Add missing fields to SolrIndexerConfig.toMap
|
||||
(Mike Drob, Christine Poerschke via Ramkumar Aiyengar)
|
||||
|
||||
* SOLR-7143: MoreLikeThis Query parser should handle multiple field names
|
||||
(Jens Wille, Anshum Gupta)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ import org.apache.lucene.queries.mlt.MoreLikeThis;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.StringUtils;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -38,8 +39,11 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class CloudMLTQParser extends QParser {
|
||||
// Pattern is thread safe -- TODO? share this with general 'fl' param
|
||||
private static final Pattern splitList = Pattern.compile(",| ");
|
||||
|
||||
public CloudMLTQParser(String qstr, SolrParams localParams,
|
||||
SolrParams params, SolrQueryRequest req) {
|
||||
|
@ -86,14 +90,21 @@ public class CloudMLTQParser extends QParser {
|
|||
String[] qf = localParams.getParams("qf");
|
||||
Map<String, Collection<Object>> filteredDocument = new HashMap();
|
||||
|
||||
ArrayList<String> fieldNames = new ArrayList();
|
||||
|
||||
if (qf != null) {
|
||||
mlt.setFieldNames(qf);
|
||||
for (String field : qf) {
|
||||
filteredDocument.put(field, doc.getFieldValues(field));
|
||||
for (String fieldName : qf) {
|
||||
if (!StringUtils.isEmpty(fieldName)) {
|
||||
String[] strings = splitList.split(fieldName);
|
||||
for (String string : strings) {
|
||||
if (!StringUtils.isEmpty(string)) {
|
||||
fieldNames.add(string);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Map<String, SchemaField> fields = req.getSchema().getFields();
|
||||
ArrayList<String> fieldNames = new ArrayList();
|
||||
for (String field : doc.getFieldNames()) {
|
||||
// Only use fields that are stored and have an explicit analyzer.
|
||||
// This makes sense as the query uses tf/idf/.. for query construction.
|
||||
|
@ -101,10 +112,18 @@ public class CloudMLTQParser extends QParser {
|
|||
if(fields.get(field).stored()
|
||||
&& fields.get(field).getType().isExplicitAnalyzer()) {
|
||||
fieldNames.add(field);
|
||||
filteredDocument.put(field, doc.getFieldValues(field));
|
||||
}
|
||||
}
|
||||
mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()]));
|
||||
}
|
||||
|
||||
if( fieldNames.size() < 1 ) {
|
||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
|
||||
"MoreLikeThis requires at least one similarity field: qf" );
|
||||
}
|
||||
|
||||
mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()]));
|
||||
for (String field : fieldNames) {
|
||||
filteredDocument.put(field, doc.getFieldValues(field));
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.StringUtils;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
|
@ -35,8 +36,11 @@ import org.apache.solr.search.SolrIndexSearcher;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class SimpleMLTQParser extends QParser {
|
||||
// Pattern is thread safe -- TODO? share this with general 'fl' param
|
||||
private static final Pattern splitList = Pattern.compile(",| ");
|
||||
|
||||
public SimpleMLTQParser(String qstr, SolrParams localParams,
|
||||
SolrParams params, SolrQueryRequest req) {
|
||||
|
@ -85,18 +89,30 @@ public class SimpleMLTQParser extends QParser {
|
|||
ArrayList<String> fields = new ArrayList();
|
||||
|
||||
if (qf != null) {
|
||||
mlt.setFieldNames(qf);
|
||||
for (String fieldName : qf) {
|
||||
if (!StringUtils.isEmpty(fieldName)) {
|
||||
String[] strings = splitList.split(fieldName);
|
||||
for (String string : strings) {
|
||||
if (!StringUtils.isEmpty(string)) {
|
||||
fields.add(string);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
Map<String, SchemaField> fieldNames = req.getSearcher().getSchema().getFields();
|
||||
for (String fieldName : fieldNames.keySet()) {
|
||||
if (fieldNames.get(fieldName).indexed() && fieldNames.get(fieldName).stored())
|
||||
if (fieldNames.get(fieldName).getType().getNumericType() == null)
|
||||
fields.add(fieldName);
|
||||
}
|
||||
mlt.setFieldNames(fields.toArray(new String[fields.size()]));
|
||||
}
|
||||
if( fields.size() < 1 ) {
|
||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
|
||||
"MoreLikeThis requires at least one similarity field: qf" );
|
||||
}
|
||||
|
||||
mlt.setFieldNames(fields.toArray(new String[fields.size()]));
|
||||
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
|
||||
|
||||
return mlt.like(scoreDocs[0].doc);
|
||||
|
|
|
@ -59,6 +59,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
|||
String id = "id";
|
||||
delQ("*:*");
|
||||
String FIELD1 = "lowerfilt" ;
|
||||
String FIELD2 = "lowerfilt1" ;
|
||||
|
||||
indexDoc(sdoc(id, "1", FIELD1, "toyota"));
|
||||
indexDoc(sdoc(id, "2", FIELD1, "chevrolet"));
|
||||
|
@ -87,6 +88,14 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
|||
indexDoc(sdoc(id, "26", FIELD1, "bmw usa 328i"));
|
||||
indexDoc(sdoc(id, "27", FIELD1, "bmw usa 535i"));
|
||||
indexDoc(sdoc(id, "28", FIELD1, "bmw 750Li"));
|
||||
indexDoc(sdoc(id, "29", FIELD1, "bmw usa",
|
||||
FIELD2, "red green blue"));
|
||||
indexDoc(sdoc(id, "30", FIELD1, "The quote red fox jumped over the lazy brown dogs.",
|
||||
FIELD2, "red green yellow"));
|
||||
indexDoc(sdoc(id, "31", FIELD1, "The fat red fox jumped over the lazy brown dogs.",
|
||||
FIELD2, "green blue yellow"));
|
||||
indexDoc(sdoc(id, "32", FIELD1, "The slim red fox jumped over the lazy brown dogs.",
|
||||
FIELD2, "yellow white black"));
|
||||
|
||||
commit();
|
||||
|
||||
|
@ -100,7 +109,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
|||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17");
|
||||
QueryResponse queryResponse = cloudClient.query(params);
|
||||
SolrDocumentList solrDocuments = queryResponse.getResults();
|
||||
int[] expectedIds = new int[]{17, 7, 13, 14, 15, 16, 20, 22, 24, 9};
|
||||
int[] expectedIds = new int[]{17, 7, 13, 14, 15, 16, 20, 22, 24, 32};
|
||||
int[] actualIds = new int[10];
|
||||
int i = 0;
|
||||
for (SolrDocument solrDocument : solrDocuments) {
|
||||
|
@ -113,7 +122,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
|||
params.set(CommonParams.DEBUG, "true");
|
||||
queryResponse = queryServer(params);
|
||||
solrDocuments = queryResponse.getResults();
|
||||
expectedIds = new int[]{3, 27, 26, 28};
|
||||
expectedIds = new int[]{3, 29, 27, 26, 28};
|
||||
actualIds = new int[solrDocuments.size()];
|
||||
i = 0;
|
||||
for (SolrDocument solrDocument : solrDocuments) {
|
||||
|
@ -139,6 +148,36 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
|||
actualParsedQueries.get(counter)));
|
||||
}
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1 mindf=0 mintf=1}26");
|
||||
params.set(CommonParams.DEBUG, "true");
|
||||
queryResponse = queryServer(params);
|
||||
solrDocuments = queryResponse.getResults();
|
||||
expectedIds = new int[]{26, 27, 3, 29, 28};
|
||||
actualIds = new int[solrDocuments.size()];
|
||||
i = 0;
|
||||
for (SolrDocument solrDocument : solrDocuments) {
|
||||
actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
|
||||
}
|
||||
|
||||
assertArrayEquals(expectedIds, actualIds);
|
||||
|
||||
expectedQueryString = "lowerfilt:bmw lowerfilt:usa lowerfilt:328i";
|
||||
|
||||
if(queryResponse.getDebugMap().get("parsedquery") instanceof String) {
|
||||
actualParsedQueries = new ArrayList();
|
||||
actualParsedQueries.add((String) queryResponse.getDebugMap().get("parsedquery"));
|
||||
} else {
|
||||
actualParsedQueries = (ArrayList<String>) queryResponse
|
||||
.getDebugMap().get("parsedquery");
|
||||
}
|
||||
|
||||
for (int counter = 0; counter < actualParsedQueries.size(); counter++) {
|
||||
assertTrue("Parsed queries aren't equal",
|
||||
compareParsedQueryStrings(expectedQueryString,
|
||||
actualParsedQueries.get(counter)));
|
||||
}
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
// Test out a high value of df and make sure nothing matches.
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=20 mintf=1}3");
|
||||
|
@ -161,7 +200,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
|||
params.set(CommonParams.DEBUG, "true");
|
||||
queryResponse = queryServer(params);
|
||||
solrDocuments = queryResponse.getResults();
|
||||
assertEquals("Expected to match 4 documents with a minwl of 3 but found more", solrDocuments.size(), 4);
|
||||
assertEquals("Expected to match 4 documents with a minwl of 3 but found more", 5, solrDocuments.size());
|
||||
|
||||
// Assert that {!mlt}id does not throw an exception i.e. implicitly, only fields that are stored + have explicit
|
||||
// analyzer are used for MLT Query construction.
|
||||
|
@ -171,10 +210,12 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
|||
queryResponse = queryServer(params);
|
||||
solrDocuments = queryResponse.getResults();
|
||||
actualIds = new int[solrDocuments.size()];
|
||||
expectedIds = new int[]{13, 14, 15, 16, 20, 22, 24, 18, 19, 21};
|
||||
expectedIds = new int[]{13, 14, 15, 16, 20, 22, 24, 32, 18, 19};
|
||||
i = 0;
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (SolrDocument solrDocument : solrDocuments) {
|
||||
actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
|
||||
sb.append(actualIds[i-1]).append(", ");
|
||||
}
|
||||
assertArrayEquals(expectedIds, actualIds);
|
||||
}
|
||||
|
|
|
@ -17,9 +17,13 @@ package org.apache.solr.search.mlt;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -34,38 +38,50 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
|
|||
@Test
|
||||
public void doTest() throws Exception {
|
||||
String id = "id";
|
||||
String FIELD1 = "lowerfilt" ;
|
||||
String FIELD2 = "lowerfilt1" ;
|
||||
delQ("*:*");
|
||||
assertU(adoc(id, "1", "lowerfilt", "toyota"));
|
||||
assertU(adoc(id, "2", "lowerfilt", "chevrolet"));
|
||||
assertU(adoc(id, "3", "lowerfilt", "suzuki"));
|
||||
assertU(adoc(id, "4", "lowerfilt", "ford"));
|
||||
assertU(adoc(id, "5", "lowerfilt", "ferrari"));
|
||||
assertU(adoc(id, "6", "lowerfilt", "jaguar"));
|
||||
assertU(adoc(id, "7", "lowerfilt", "mclaren moon or the moon and moon moon shine " +
|
||||
assertU(adoc(id, "1", FIELD1, "toyota"));
|
||||
assertU(adoc(id, "2", FIELD1, "chevrolet"));
|
||||
assertU(adoc(id, "3", FIELD1, "suzuki"));
|
||||
assertU(adoc(id, "4", FIELD1, "ford"));
|
||||
assertU(adoc(id, "5", FIELD1, "ferrari"));
|
||||
assertU(adoc(id, "6", FIELD1, "jaguar"));
|
||||
assertU(adoc(id, "7", FIELD1, "mclaren moon or the moon and moon moon shine " +
|
||||
"and the moon but moon was good foxes too"));
|
||||
assertU(adoc(id, "8", "lowerfilt", "sonata"));
|
||||
assertU(adoc(id, "9", "lowerfilt", "The quick red fox jumped over the lazy big " +
|
||||
assertU(adoc(id, "8", FIELD1, "sonata"));
|
||||
assertU(adoc(id, "9", FIELD1, "The quick red fox jumped over the lazy big " +
|
||||
"and large brown dogs."));
|
||||
assertU(adoc(id, "10", "lowerfilt", "blue"));
|
||||
assertU(adoc(id, "12", "lowerfilt", "glue"));
|
||||
assertU(adoc(id, "13", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "14", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "15", "lowerfilt", "The fat red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "16", "lowerfilt", "The slim red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "17", "lowerfilt", "The quote red fox jumped moon over the lazy " +
|
||||
assertU(adoc(id, "10", FIELD1, "blue"));
|
||||
assertU(adoc(id, "12", FIELD1, "glue"));
|
||||
assertU(adoc(id, "13", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "14", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "15", FIELD1, "The fat red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "16", FIELD1, "The slim red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "17", FIELD1, "The quote red fox jumped moon over the lazy " +
|
||||
"brown dogs moon. Of course moon. Foxes and moon come back to the foxes and moon"));
|
||||
assertU(adoc(id, "18", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "19", "lowerfilt", "The hose red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "20", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "21", "lowerfilt", "The court red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "22", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "23", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "24", "lowerfilt", "The file red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "25", "lowerfilt", "rod fix"));
|
||||
assertU(adoc(id, "18", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "19", FIELD1, "The hose red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "20", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "21", FIELD1, "The court red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "22", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "23", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "24", FIELD1, "The file red fox jumped over the lazy brown dogs."));
|
||||
assertU(adoc(id, "25", FIELD1, "rod fix"));
|
||||
assertU(adoc(id, "26", FIELD1, "bmw usa 328i"));
|
||||
assertU(adoc(id, "27", FIELD1, "bmw usa 535i"));
|
||||
assertU(adoc(id, "28", FIELD1, "bmw 750Li"));
|
||||
assertU(adoc(id, "29", FIELD1, "bmw usa",
|
||||
FIELD2, "red green blue"));
|
||||
assertU(adoc(id, "30", FIELD1, "The quote red fox jumped over the lazy brown dogs.",
|
||||
FIELD2, "red green yellow"));
|
||||
assertU(adoc(id, "31", FIELD1, "The fat red fox jumped over the lazy brown dogs.",
|
||||
FIELD2, "green blue yellow"));
|
||||
assertU(adoc(id, "32", FIELD1, "The slim red fox jumped over the lazy brown dogs.",
|
||||
FIELD2, "yellow white black"));
|
||||
|
||||
assertU(commit());
|
||||
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17");
|
||||
assertQ(req(params),
|
||||
|
@ -79,7 +95,38 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
|
|||
"//result/doc[8]/int[@name='id'][.='21']",
|
||||
"//result/doc[9]/int[@name='id'][.='22']",
|
||||
"//result/doc[10]/int[@name='id'][.='23']"
|
||||
);
|
||||
);
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=0 mintf=1}26");
|
||||
params.set(CommonParams.DEBUG, "true");
|
||||
assertQ(req(params),
|
||||
"//result/doc[1]/int[@name='id'][.='26']",
|
||||
"//result/doc[2]/int[@name='id'][.='29']",
|
||||
"//result/doc[3]/int[@name='id'][.='27']",
|
||||
"//result/doc[4]/int[@name='id'][.='28']"
|
||||
);
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=10 mintf=1}26");
|
||||
params.set(CommonParams.DEBUG, "true");
|
||||
assertQ(req(params),
|
||||
"//result[@numFound='0']"
|
||||
);
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=3 mintf=1 mindf=1}26");
|
||||
params.set(CommonParams.DEBUG, "true");
|
||||
assertQ(req(params),
|
||||
"//result[@numFound='4']"
|
||||
);
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=4 mintf=1 mindf=1}26");
|
||||
params.set(CommonParams.DEBUG, "true");
|
||||
assertQ(req(params),
|
||||
"//result[@numFound='1']"
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue