diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 5765f20316e..a0faec3189c 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -130,6 +130,9 @@ New Features Also, the number of stored (failed and successful) responses are now restricted to 10,000 each as a safety net. (Anshum Gupta) +* SOLR-7639: MoreLikeThis QParser now supports all options provided by the MLT Handler i.e. mintf, mindf, + minwl, maxwl, maxqt, and maxntp. + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java index 5f2c300f6a5..b853db2d43b 100644 --- a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java +++ b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java @@ -53,22 +53,30 @@ public class CloudMLTQParser extends QParser { // Do a Real Time Get for the document SolrDocument doc = getDocument(id); if(doc == null) { - new SolrException( + throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + id + "]"); } MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader()); - // TODO: Are the mintf and mindf defaults ok at 1/0 ? - mlt.setMinTermFreq(localParams.getInt("mintf", 1)); + if(localParams.getInt("mintf") != null) + mlt.setMinTermFreq(localParams.getInt("mintf")); + mlt.setMinDocFreq(localParams.getInt("mindf", 0)); + if(localParams.get("minwl") != null) mlt.setMinWordLen(localParams.getInt("minwl")); - + if(localParams.get("maxwl") != null) mlt.setMaxWordLen(localParams.getInt("maxwl")); + if(localParams.get("maxqt") != null) + mlt.setMaxWordLen(localParams.getInt("maxqt")); + + if(localParams.get("maxntp") != null) + mlt.setMaxWordLen(localParams.getInt("maxntp")); + mlt.setAnalyzer(req.getSchema().getIndexAnalyzer()); String[] qf = localParams.getParams("qf"); diff --git a/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java index 3289d36a879..2106b87dcf0 100644 --- a/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java +++ b/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java @@ -59,15 +59,25 @@ public class SimpleMLTQParser extends QParser { "document with id [" + uniqueValue + "]"); ScoreDoc[] scoreDocs = td.scoreDocs; MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader()); - // TODO: Are the mintf and mindf defaults ok at '1' ? - mlt.setMinTermFreq(localParams.getInt("mintf", 1)); - mlt.setMinDocFreq(localParams.getInt("mindf", 1)); + + if(localParams.getInt("mintf") != null) + mlt.setMinTermFreq(localParams.getInt("mintf")); + + if(localParams.getInt("mindf") != null) + mlt.setMinDocFreq(localParams.getInt("mindf")); + if(localParams.get("minwl") != null) mlt.setMinWordLen(localParams.getInt("minwl")); if(localParams.get("maxwl") != null) mlt.setMaxWordLen(localParams.getInt("maxwl")); + if(localParams.get("maxqt") != null) + mlt.setMaxWordLen(localParams.getInt("maxqt")); + + if(localParams.get("maxntp") != null) + mlt.setMaxWordLen(localParams.getInt("maxntp")); + ArrayList fields = new ArrayList(); if (qf != null) { diff --git a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java index c0b0eb08203..0780894ac61 100644 --- a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java @@ -22,6 +22,7 @@ import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.cloud.AbstractFullDistribZkTestBase; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.junit.Test; @@ -99,7 +100,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase { params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17"); QueryResponse queryResponse = cloudClient.query(params); SolrDocumentList solrDocuments = queryResponse.getResults(); - int[] expectedIds = new int[]{17, 13, 14, 20, 22, 15, 16, 24, 18, 23}; + int[] expectedIds = new int[]{17, 7, 13, 14, 15, 16, 20, 22, 24, 9}; int[] actualIds = new int[10]; int i = 0; for (SolrDocument solrDocument : solrDocuments) { @@ -108,32 +109,19 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase { assertArrayEquals(expectedIds, actualIds); params = new ModifiableSolrParams(); - params.set(CommonParams.Q, "{!mlt qf=lowerfilt}3"); + params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=0 mintf=1}3"); + params.set(CommonParams.DEBUG, "true"); queryResponse = queryServer(params); solrDocuments = queryResponse.getResults(); expectedIds = new int[]{3, 27, 26, 28}; - actualIds = new int[4]; + actualIds = new int[solrDocuments.size()]; i = 0; for (SolrDocument solrDocument : solrDocuments) { actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id"))); } assertArrayEquals(expectedIds, actualIds); - params = new ModifiableSolrParams(); - params.set(CommonParams.Q, "{!mlt qf=lowerfilt}20"); - params.set("debug" , "query"); - queryResponse = queryServer(params); - solrDocuments = queryResponse.getResults(); - expectedIds = new int[]{18, 23, 13, 14, 20, 22, 19, 21, 15, 16}; - actualIds = new int[10]; - i = 0; - for (SolrDocument solrDocument : solrDocuments) { - actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id"))); - } - assertArrayEquals(expectedIds, actualIds); - - String expectedQueryString = "lowerfilt:over lowerfilt:fox lowerfilt:lazy lowerfilt:brown " - + "lowerfilt:jumped lowerfilt:red lowerfilt:dogs. lowerfilt:quote lowerfilt:the"; + String expectedQueryString = "lowerfilt:bmw lowerfilt:usa"; ArrayList actualParsedQueries = (ArrayList) queryResponse .getDebugMap().get("parsedquery"); @@ -143,7 +131,31 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase { compareParsedQueryStrings(expectedQueryString, actualParsedQueries.get(counter))); } - + + params = new ModifiableSolrParams(); + // Test out a high value of df and make sure nothing matches. + params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=20 mintf=1}3"); + params.set(CommonParams.DEBUG, "true"); + queryResponse = queryServer(params); + solrDocuments = queryResponse.getResults(); + assertEquals("Expected to match 0 documents with a mindf of 20 but found more", solrDocuments.size(), 0); + + params = new ModifiableSolrParams(); + // Test out a high value of wl and make sure nothing matches. + params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=4 mintf=1}3"); + params.set(CommonParams.DEBUG, "true"); + queryResponse = queryServer(params); + solrDocuments = queryResponse.getResults(); + assertEquals("Expected to match 0 documents with a minwl of 4 but found more", solrDocuments.size(), 0); + + params = new ModifiableSolrParams(); + // Test out a low enough value of minwl and make sure we get the expected matches. + params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=3 mintf=1}3"); + params.set(CommonParams.DEBUG, "true"); + queryResponse = queryServer(params); + solrDocuments = queryResponse.getResults(); + assertEquals("Expected to match 4 documents with a minwl of 3 but found more", solrDocuments.size(), 4); + // Assert that {!mlt}id does not throw an exception i.e. implicitly, only fields that are stored + have explicit // analyzer are used for MLT Query construction. params = new ModifiableSolrParams(); @@ -151,7 +163,8 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase { queryResponse = queryServer(params); solrDocuments = queryResponse.getResults(); - expectedIds = new int[]{18, 23, 13, 14, 20, 22, 19, 21, 15, 16}; + actualIds = new int[solrDocuments.size()]; + expectedIds = new int[]{13, 14, 15, 16, 20, 22, 24, 18, 19, 21}; i = 0; for (SolrDocument solrDocument : solrDocuments) { actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id"))); @@ -159,10 +172,10 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase { assertArrayEquals(expectedIds, actualIds); } - @Test + @Test(expected=SolrException.class) public void testInvalidDocument() throws IOException { ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(CommonParams.Q, "{!mlt qf=lowerfilt}nonexistentdocid"); + params.set(CommonParams.Q, "{!mlt qf=lowerfilt}999999"); try { cloudClient.query(params); fail("The above query is supposed to throw an exception."); diff --git a/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java b/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java index 744afc47955..59a136316d4 100644 --- a/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java @@ -69,16 +69,16 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 { ModifiableSolrParams params = new ModifiableSolrParams(); params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17"); assertQ(req(params), - "//result/doc[1]/int[@name='id'][.='17']", - "//result/doc[2]/int[@name='id'][.='13']", - "//result/doc[3]/int[@name='id'][.='14']", - "//result/doc[4]/int[@name='id'][.='18']", - "//result/doc[5]/int[@name='id'][.='20']", - "//result/doc[6]/int[@name='id'][.='22']", - "//result/doc[7]/int[@name='id'][.='23']", - "//result/doc[8]/int[@name='id'][.='9']", - "//result/doc[9]/int[@name='id'][.='7']", - "//result/doc[10]/int[@name='id'][.='15']" + "//result/doc[1]/int[@name='id'][.='13']", + "//result/doc[2]/int[@name='id'][.='14']", + "//result/doc[3]/int[@name='id'][.='15']", + "//result/doc[4]/int[@name='id'][.='16']", + "//result/doc[5]/int[@name='id'][.='18']", + "//result/doc[6]/int[@name='id'][.='19']", + "//result/doc[7]/int[@name='id'][.='20']", + "//result/doc[8]/int[@name='id'][.='21']", + "//result/doc[9]/int[@name='id'][.='22']", + "//result/doc[10]/int[@name='id'][.='23']" ); }