diff --git a/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java b/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java index 32df06f63eb..115161b6c93 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java @@ -753,9 +753,10 @@ public final class MoreLikeThis { IOException { HashMap termFreqMap = new HashMap(); for (String fieldName : fieldNames) { - for (String field : fields.keySet()) { Collection fieldValues = fields.get(field); + if(fieldValues == null) + continue; for(Object fieldValue:fieldValues) { if (fieldValue != null) { addTermFrequencies(new StringReader(String.valueOf(fieldValue)), termFreqMap, diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 64ca5f6c2ac..50e1cc9b091 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -209,6 +209,9 @@ Bug Fixes * SOLR-7741: Add missing fields to SolrIndexerConfig.toMap (Mike Drob, Christine Poerschke via Ramkumar Aiyengar) +* SOLR-7143: MoreLikeThis Query parser should handle multiple field names + (Jens Wille, Anshum Gupta) + Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java index 52f9ffaeb47..a68dd9f196f 100644 --- a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java +++ b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java @@ -20,6 +20,7 @@ import org.apache.lucene.queries.mlt.MoreLikeThis; import org.apache.lucene.search.Query; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrException; +import org.apache.solr.common.StringUtils; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; @@ -38,8 +39,11 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Map; +import java.util.regex.Pattern; public class CloudMLTQParser extends QParser { + // Pattern is thread safe -- TODO? share this with general 'fl' param + private static final Pattern splitList = Pattern.compile(",| "); public CloudMLTQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { @@ -86,14 +90,21 @@ public class CloudMLTQParser extends QParser { String[] qf = localParams.getParams("qf"); Map> filteredDocument = new HashMap(); + ArrayList fieldNames = new ArrayList(); + if (qf != null) { - mlt.setFieldNames(qf); - for (String field : qf) { - filteredDocument.put(field, doc.getFieldValues(field)); + for (String fieldName : qf) { + if (!StringUtils.isEmpty(fieldName)) { + String[] strings = splitList.split(fieldName); + for (String string : strings) { + if (!StringUtils.isEmpty(string)) { + fieldNames.add(string); + } + } + } } } else { Map fields = req.getSchema().getFields(); - ArrayList fieldNames = new ArrayList(); for (String field : doc.getFieldNames()) { // Only use fields that are stored and have an explicit analyzer. // This makes sense as the query uses tf/idf/.. for query construction. @@ -101,10 +112,18 @@ public class CloudMLTQParser extends QParser { if(fields.get(field).stored() && fields.get(field).getType().isExplicitAnalyzer()) { fieldNames.add(field); - filteredDocument.put(field, doc.getFieldValues(field)); } } - mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()])); + } + + if( fieldNames.size() < 1 ) { + throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, + "MoreLikeThis requires at least one similarity field: qf" ); + } + + mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()])); + for (String field : fieldNames) { + filteredDocument.put(field, doc.getFieldValues(field)); } try { diff --git a/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java index 75562188e80..0edeb10790a 100644 --- a/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java +++ b/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java @@ -25,6 +25,7 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.NumericUtils; import org.apache.solr.common.SolrException; +import org.apache.solr.common.StringUtils; import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.SchemaField; @@ -35,8 +36,11 @@ import org.apache.solr.search.SolrIndexSearcher; import java.io.IOException; import java.util.ArrayList; import java.util.Map; +import java.util.regex.Pattern; public class SimpleMLTQParser extends QParser { + // Pattern is thread safe -- TODO? share this with general 'fl' param + private static final Pattern splitList = Pattern.compile(",| "); public SimpleMLTQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { @@ -85,18 +89,30 @@ public class SimpleMLTQParser extends QParser { ArrayList fields = new ArrayList(); if (qf != null) { - mlt.setFieldNames(qf); + for (String fieldName : qf) { + if (!StringUtils.isEmpty(fieldName)) { + String[] strings = splitList.split(fieldName); + for (String string : strings) { + if (!StringUtils.isEmpty(string)) { + fields.add(string); + } + } + } + } } else { - Map fieldNames = req.getSearcher().getSchema().getFields(); for (String fieldName : fieldNames.keySet()) { if (fieldNames.get(fieldName).indexed() && fieldNames.get(fieldName).stored()) if (fieldNames.get(fieldName).getType().getNumericType() == null) fields.add(fieldName); } - mlt.setFieldNames(fields.toArray(new String[fields.size()])); + } + if( fields.size() < 1 ) { + throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, + "MoreLikeThis requires at least one similarity field: qf" ); } + mlt.setFieldNames(fields.toArray(new String[fields.size()])); mlt.setAnalyzer(req.getSchema().getIndexAnalyzer()); return mlt.like(scoreDocs[0].doc); diff --git a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java index 91d3e1db6f8..9c16de35da2 100644 --- a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java @@ -59,6 +59,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase { String id = "id"; delQ("*:*"); String FIELD1 = "lowerfilt" ; + String FIELD2 = "lowerfilt1" ; indexDoc(sdoc(id, "1", FIELD1, "toyota")); indexDoc(sdoc(id, "2", FIELD1, "chevrolet")); @@ -87,6 +88,14 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase { indexDoc(sdoc(id, "26", FIELD1, "bmw usa 328i")); indexDoc(sdoc(id, "27", FIELD1, "bmw usa 535i")); indexDoc(sdoc(id, "28", FIELD1, "bmw 750Li")); + indexDoc(sdoc(id, "29", FIELD1, "bmw usa", + FIELD2, "red green blue")); + indexDoc(sdoc(id, "30", FIELD1, "The quote red fox jumped over the lazy brown dogs.", + FIELD2, "red green yellow")); + indexDoc(sdoc(id, "31", FIELD1, "The fat red fox jumped over the lazy brown dogs.", + FIELD2, "green blue yellow")); + indexDoc(sdoc(id, "32", FIELD1, "The slim red fox jumped over the lazy brown dogs.", + FIELD2, "yellow white black")); commit(); @@ -100,7 +109,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase { params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17"); QueryResponse queryResponse = cloudClient.query(params); SolrDocumentList solrDocuments = queryResponse.getResults(); - int[] expectedIds = new int[]{17, 7, 13, 14, 15, 16, 20, 22, 24, 9}; + int[] expectedIds = new int[]{17, 7, 13, 14, 15, 16, 20, 22, 24, 32}; int[] actualIds = new int[10]; int i = 0; for (SolrDocument solrDocument : solrDocuments) { @@ -113,7 +122,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase { params.set(CommonParams.DEBUG, "true"); queryResponse = queryServer(params); solrDocuments = queryResponse.getResults(); - expectedIds = new int[]{3, 27, 26, 28}; + expectedIds = new int[]{3, 29, 27, 26, 28}; actualIds = new int[solrDocuments.size()]; i = 0; for (SolrDocument solrDocument : solrDocuments) { @@ -139,6 +148,36 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase { actualParsedQueries.get(counter))); } + params = new ModifiableSolrParams(); + params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1 mindf=0 mintf=1}26"); + params.set(CommonParams.DEBUG, "true"); + queryResponse = queryServer(params); + solrDocuments = queryResponse.getResults(); + expectedIds = new int[]{26, 27, 3, 29, 28}; + actualIds = new int[solrDocuments.size()]; + i = 0; + for (SolrDocument solrDocument : solrDocuments) { + actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id"))); + } + + assertArrayEquals(expectedIds, actualIds); + + expectedQueryString = "lowerfilt:bmw lowerfilt:usa lowerfilt:328i"; + + if(queryResponse.getDebugMap().get("parsedquery") instanceof String) { + actualParsedQueries = new ArrayList(); + actualParsedQueries.add((String) queryResponse.getDebugMap().get("parsedquery")); + } else { + actualParsedQueries = (ArrayList) queryResponse + .getDebugMap().get("parsedquery"); + } + + for (int counter = 0; counter < actualParsedQueries.size(); counter++) { + assertTrue("Parsed queries aren't equal", + compareParsedQueryStrings(expectedQueryString, + actualParsedQueries.get(counter))); + } + params = new ModifiableSolrParams(); // Test out a high value of df and make sure nothing matches. params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=20 mintf=1}3"); @@ -161,7 +200,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase { params.set(CommonParams.DEBUG, "true"); queryResponse = queryServer(params); solrDocuments = queryResponse.getResults(); - assertEquals("Expected to match 4 documents with a minwl of 3 but found more", solrDocuments.size(), 4); + assertEquals("Expected to match 4 documents with a minwl of 3 but found more", 5, solrDocuments.size()); // Assert that {!mlt}id does not throw an exception i.e. implicitly, only fields that are stored + have explicit // analyzer are used for MLT Query construction. @@ -171,10 +210,12 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase { queryResponse = queryServer(params); solrDocuments = queryResponse.getResults(); actualIds = new int[solrDocuments.size()]; - expectedIds = new int[]{13, 14, 15, 16, 20, 22, 24, 18, 19, 21}; + expectedIds = new int[]{13, 14, 15, 16, 20, 22, 24, 32, 18, 19}; i = 0; + StringBuilder sb = new StringBuilder(); for (SolrDocument solrDocument : solrDocuments) { actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id"))); + sb.append(actualIds[i-1]).append(", "); } assertArrayEquals(expectedIds, actualIds); } diff --git a/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java b/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java index 59a136316d4..6ef72c78bce 100644 --- a/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java @@ -17,9 +17,13 @@ package org.apache.solr.search.mlt; * limitations under the License. */ +import java.util.ArrayList; + import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrDocument; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.response.SolrQueryResponse; import org.junit.BeforeClass; import org.junit.Test; @@ -34,38 +38,50 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 { @Test public void doTest() throws Exception { String id = "id"; + String FIELD1 = "lowerfilt" ; + String FIELD2 = "lowerfilt1" ; delQ("*:*"); - assertU(adoc(id, "1", "lowerfilt", "toyota")); - assertU(adoc(id, "2", "lowerfilt", "chevrolet")); - assertU(adoc(id, "3", "lowerfilt", "suzuki")); - assertU(adoc(id, "4", "lowerfilt", "ford")); - assertU(adoc(id, "5", "lowerfilt", "ferrari")); - assertU(adoc(id, "6", "lowerfilt", "jaguar")); - assertU(adoc(id, "7", "lowerfilt", "mclaren moon or the moon and moon moon shine " + + assertU(adoc(id, "1", FIELD1, "toyota")); + assertU(adoc(id, "2", FIELD1, "chevrolet")); + assertU(adoc(id, "3", FIELD1, "suzuki")); + assertU(adoc(id, "4", FIELD1, "ford")); + assertU(adoc(id, "5", FIELD1, "ferrari")); + assertU(adoc(id, "6", FIELD1, "jaguar")); + assertU(adoc(id, "7", FIELD1, "mclaren moon or the moon and moon moon shine " + "and the moon but moon was good foxes too")); - assertU(adoc(id, "8", "lowerfilt", "sonata")); - assertU(adoc(id, "9", "lowerfilt", "The quick red fox jumped over the lazy big " + + assertU(adoc(id, "8", FIELD1, "sonata")); + assertU(adoc(id, "9", FIELD1, "The quick red fox jumped over the lazy big " + "and large brown dogs.")); - assertU(adoc(id, "10", "lowerfilt", "blue")); - assertU(adoc(id, "12", "lowerfilt", "glue")); - assertU(adoc(id, "13", "lowerfilt", "The quote red fox jumped over the lazy brown dogs.")); - assertU(adoc(id, "14", "lowerfilt", "The quote red fox jumped over the lazy brown dogs.")); - assertU(adoc(id, "15", "lowerfilt", "The fat red fox jumped over the lazy brown dogs.")); - assertU(adoc(id, "16", "lowerfilt", "The slim red fox jumped over the lazy brown dogs.")); - assertU(adoc(id, "17", "lowerfilt", "The quote red fox jumped moon over the lazy " + + assertU(adoc(id, "10", FIELD1, "blue")); + assertU(adoc(id, "12", FIELD1, "glue")); + assertU(adoc(id, "13", FIELD1, "The quote red fox jumped over the lazy brown dogs.")); + assertU(adoc(id, "14", FIELD1, "The quote red fox jumped over the lazy brown dogs.")); + assertU(adoc(id, "15", FIELD1, "The fat red fox jumped over the lazy brown dogs.")); + assertU(adoc(id, "16", FIELD1, "The slim red fox jumped over the lazy brown dogs.")); + assertU(adoc(id, "17", FIELD1, "The quote red fox jumped moon over the lazy " + "brown dogs moon. Of course moon. Foxes and moon come back to the foxes and moon")); - assertU(adoc(id, "18", "lowerfilt", "The quote red fox jumped over the lazy brown dogs.")); - assertU(adoc(id, "19", "lowerfilt", "The hose red fox jumped over the lazy brown dogs.")); - assertU(adoc(id, "20", "lowerfilt", "The quote red fox jumped over the lazy brown dogs.")); - assertU(adoc(id, "21", "lowerfilt", "The court red fox jumped over the lazy brown dogs.")); - assertU(adoc(id, "22", "lowerfilt", "The quote red fox jumped over the lazy brown dogs.")); - assertU(adoc(id, "23", "lowerfilt", "The quote red fox jumped over the lazy brown dogs.")); - assertU(adoc(id, "24", "lowerfilt", "The file red fox jumped over the lazy brown dogs.")); - assertU(adoc(id, "25", "lowerfilt", "rod fix")); + assertU(adoc(id, "18", FIELD1, "The quote red fox jumped over the lazy brown dogs.")); + assertU(adoc(id, "19", FIELD1, "The hose red fox jumped over the lazy brown dogs.")); + assertU(adoc(id, "20", FIELD1, "The quote red fox jumped over the lazy brown dogs.")); + assertU(adoc(id, "21", FIELD1, "The court red fox jumped over the lazy brown dogs.")); + assertU(adoc(id, "22", FIELD1, "The quote red fox jumped over the lazy brown dogs.")); + assertU(adoc(id, "23", FIELD1, "The quote red fox jumped over the lazy brown dogs.")); + assertU(adoc(id, "24", FIELD1, "The file red fox jumped over the lazy brown dogs.")); + assertU(adoc(id, "25", FIELD1, "rod fix")); + assertU(adoc(id, "26", FIELD1, "bmw usa 328i")); + assertU(adoc(id, "27", FIELD1, "bmw usa 535i")); + assertU(adoc(id, "28", FIELD1, "bmw 750Li")); + assertU(adoc(id, "29", FIELD1, "bmw usa", + FIELD2, "red green blue")); + assertU(adoc(id, "30", FIELD1, "The quote red fox jumped over the lazy brown dogs.", + FIELD2, "red green yellow")); + assertU(adoc(id, "31", FIELD1, "The fat red fox jumped over the lazy brown dogs.", + FIELD2, "green blue yellow")); + assertU(adoc(id, "32", FIELD1, "The slim red fox jumped over the lazy brown dogs.", + FIELD2, "yellow white black")); assertU(commit()); - ModifiableSolrParams params = new ModifiableSolrParams(); params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17"); assertQ(req(params), @@ -79,7 +95,38 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 { "//result/doc[8]/int[@name='id'][.='21']", "//result/doc[9]/int[@name='id'][.='22']", "//result/doc[10]/int[@name='id'][.='23']" - ); + ); + + params = new ModifiableSolrParams(); + params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=0 mintf=1}26"); + params.set(CommonParams.DEBUG, "true"); + assertQ(req(params), + "//result/doc[1]/int[@name='id'][.='26']", + "//result/doc[2]/int[@name='id'][.='29']", + "//result/doc[3]/int[@name='id'][.='27']", + "//result/doc[4]/int[@name='id'][.='28']" + ); + + params = new ModifiableSolrParams(); + params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=10 mintf=1}26"); + params.set(CommonParams.DEBUG, "true"); + assertQ(req(params), + "//result[@numFound='0']" + ); + + params = new ModifiableSolrParams(); + params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=3 mintf=1 mindf=1}26"); + params.set(CommonParams.DEBUG, "true"); + assertQ(req(params), + "//result[@numFound='4']" + ); + + params = new ModifiableSolrParams(); + params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=4 mintf=1 mindf=1}26"); + params.set(CommonParams.DEBUG, "true"); + assertQ(req(params), + "//result[@numFound='1']" + ); } }