SOLR-7639: MoreLikeThis QParser now supports all options provided by the MLT Handler i.e. mintf, mindf, minwl, maxwl, maxqt, and maxntp. This commit also fixes an NPE issue in CloudMLTQParser

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1686123 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Anshum Gupta 2015-06-17 23:12:15 +00:00
parent 929cb9544d
commit 2b96b25046
5 changed files with 73 additions and 39 deletions

View File

@ -130,6 +130,9 @@ New Features
Also, the number of stored (failed and successful) responses are now restricted to 10,000 each as a safety net.
(Anshum Gupta)
* SOLR-7639: MoreLikeThis QParser now supports all options provided by the MLT Handler i.e. mintf, mindf,
minwl, maxwl, maxqt, and maxntp.
Bug Fixes
----------------------

View File

@ -53,22 +53,30 @@ public class CloudMLTQParser extends QParser {
// Do a Real Time Get for the document
SolrDocument doc = getDocument(id);
if(doc == null) {
new SolrException(
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " +
"document with id [" + id + "]");
}
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
// TODO: Are the mintf and mindf defaults ok at 1/0 ?
mlt.setMinTermFreq(localParams.getInt("mintf", 1));
if(localParams.getInt("mintf") != null)
mlt.setMinTermFreq(localParams.getInt("mintf"));
mlt.setMinDocFreq(localParams.getInt("mindf", 0));
if(localParams.get("minwl") != null)
mlt.setMinWordLen(localParams.getInt("minwl"));
if(localParams.get("maxwl") != null)
mlt.setMaxWordLen(localParams.getInt("maxwl"));
if(localParams.get("maxqt") != null)
mlt.setMaxWordLen(localParams.getInt("maxqt"));
if(localParams.get("maxntp") != null)
mlt.setMaxWordLen(localParams.getInt("maxntp"));
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
String[] qf = localParams.getParams("qf");

View File

@ -59,15 +59,25 @@ public class SimpleMLTQParser extends QParser {
"document with id [" + uniqueValue + "]");
ScoreDoc[] scoreDocs = td.scoreDocs;
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
// TODO: Are the mintf and mindf defaults ok at '1' ?
mlt.setMinTermFreq(localParams.getInt("mintf", 1));
mlt.setMinDocFreq(localParams.getInt("mindf", 1));
if(localParams.getInt("mintf") != null)
mlt.setMinTermFreq(localParams.getInt("mintf"));
if(localParams.getInt("mindf") != null)
mlt.setMinDocFreq(localParams.getInt("mindf"));
if(localParams.get("minwl") != null)
mlt.setMinWordLen(localParams.getInt("minwl"));
if(localParams.get("maxwl") != null)
mlt.setMaxWordLen(localParams.getInt("maxwl"));
if(localParams.get("maxqt") != null)
mlt.setMaxWordLen(localParams.getInt("maxqt"));
if(localParams.get("maxntp") != null)
mlt.setMaxWordLen(localParams.getInt("maxntp"));
ArrayList<String> fields = new ArrayList();
if (qf != null) {

View File

@ -22,6 +22,7 @@ import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.cloud.AbstractFullDistribZkTestBase;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.junit.Test;
@ -99,7 +100,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17");
QueryResponse queryResponse = cloudClient.query(params);
SolrDocumentList solrDocuments = queryResponse.getResults();
int[] expectedIds = new int[]{17, 13, 14, 20, 22, 15, 16, 24, 18, 23};
int[] expectedIds = new int[]{17, 7, 13, 14, 15, 16, 20, 22, 24, 9};
int[] actualIds = new int[10];
int i = 0;
for (SolrDocument solrDocument : solrDocuments) {
@ -108,32 +109,19 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
assertArrayEquals(expectedIds, actualIds);
params = new ModifiableSolrParams();
params.set(CommonParams.Q, "{!mlt qf=lowerfilt}3");
params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=0 mintf=1}3");
params.set(CommonParams.DEBUG, "true");
queryResponse = queryServer(params);
solrDocuments = queryResponse.getResults();
expectedIds = new int[]{3, 27, 26, 28};
actualIds = new int[4];
actualIds = new int[solrDocuments.size()];
i = 0;
for (SolrDocument solrDocument : solrDocuments) {
actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
}
assertArrayEquals(expectedIds, actualIds);
params = new ModifiableSolrParams();
params.set(CommonParams.Q, "{!mlt qf=lowerfilt}20");
params.set("debug" , "query");
queryResponse = queryServer(params);
solrDocuments = queryResponse.getResults();
expectedIds = new int[]{18, 23, 13, 14, 20, 22, 19, 21, 15, 16};
actualIds = new int[10];
i = 0;
for (SolrDocument solrDocument : solrDocuments) {
actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
}
assertArrayEquals(expectedIds, actualIds);
String expectedQueryString = "lowerfilt:over lowerfilt:fox lowerfilt:lazy lowerfilt:brown "
+ "lowerfilt:jumped lowerfilt:red lowerfilt:dogs. lowerfilt:quote lowerfilt:the";
String expectedQueryString = "lowerfilt:bmw lowerfilt:usa";
ArrayList<String> actualParsedQueries = (ArrayList<String>) queryResponse
.getDebugMap().get("parsedquery");
@ -143,7 +131,31 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
compareParsedQueryStrings(expectedQueryString,
actualParsedQueries.get(counter)));
}
params = new ModifiableSolrParams();
// Test out a high value of df and make sure nothing matches.
params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=20 mintf=1}3");
params.set(CommonParams.DEBUG, "true");
queryResponse = queryServer(params);
solrDocuments = queryResponse.getResults();
assertEquals("Expected to match 0 documents with a mindf of 20 but found more", solrDocuments.size(), 0);
params = new ModifiableSolrParams();
// Test out a high value of wl and make sure nothing matches.
params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=4 mintf=1}3");
params.set(CommonParams.DEBUG, "true");
queryResponse = queryServer(params);
solrDocuments = queryResponse.getResults();
assertEquals("Expected to match 0 documents with a minwl of 4 but found more", solrDocuments.size(), 0);
params = new ModifiableSolrParams();
// Test out a low enough value of minwl and make sure we get the expected matches.
params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=3 mintf=1}3");
params.set(CommonParams.DEBUG, "true");
queryResponse = queryServer(params);
solrDocuments = queryResponse.getResults();
assertEquals("Expected to match 4 documents with a minwl of 3 but found more", solrDocuments.size(), 4);
// Assert that {!mlt}id does not throw an exception i.e. implicitly, only fields that are stored + have explicit
// analyzer are used for MLT Query construction.
params = new ModifiableSolrParams();
@ -151,7 +163,8 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
queryResponse = queryServer(params);
solrDocuments = queryResponse.getResults();
expectedIds = new int[]{18, 23, 13, 14, 20, 22, 19, 21, 15, 16};
actualIds = new int[solrDocuments.size()];
expectedIds = new int[]{13, 14, 15, 16, 20, 22, 24, 18, 19, 21};
i = 0;
for (SolrDocument solrDocument : solrDocuments) {
actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
@ -159,10 +172,10 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
assertArrayEquals(expectedIds, actualIds);
}
@Test
@Test(expected=SolrException.class)
public void testInvalidDocument() throws IOException {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CommonParams.Q, "{!mlt qf=lowerfilt}nonexistentdocid");
params.set(CommonParams.Q, "{!mlt qf=lowerfilt}999999");
try {
cloudClient.query(params);
fail("The above query is supposed to throw an exception.");

View File

@ -69,16 +69,16 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17");
assertQ(req(params),
"//result/doc[1]/int[@name='id'][.='17']",
"//result/doc[2]/int[@name='id'][.='13']",
"//result/doc[3]/int[@name='id'][.='14']",
"//result/doc[4]/int[@name='id'][.='18']",
"//result/doc[5]/int[@name='id'][.='20']",
"//result/doc[6]/int[@name='id'][.='22']",
"//result/doc[7]/int[@name='id'][.='23']",
"//result/doc[8]/int[@name='id'][.='9']",
"//result/doc[9]/int[@name='id'][.='7']",
"//result/doc[10]/int[@name='id'][.='15']"
"//result/doc[1]/int[@name='id'][.='13']",
"//result/doc[2]/int[@name='id'][.='14']",
"//result/doc[3]/int[@name='id'][.='15']",
"//result/doc[4]/int[@name='id'][.='16']",
"//result/doc[5]/int[@name='id'][.='18']",
"//result/doc[6]/int[@name='id'][.='19']",
"//result/doc[7]/int[@name='id'][.='20']",
"//result/doc[8]/int[@name='id'][.='21']",
"//result/doc[9]/int[@name='id'][.='22']",
"//result/doc[10]/int[@name='id'][.='23']"
);
}