mirror of https://github.com/apache/lucene.git
SOLR-5351: Fixed More Like This Handler to use all fields provided in mlt.fl when used with content stream. The similarity is calculated between the content stream's value and all fields listed in mlt.fl.
This commit is contained in:
parent
0a1de2c4a5
commit
41ecad9897
|
@ -131,6 +131,10 @@ New Features
|
|||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
* SOLR-5351: Fixed More Like This Handler to use all fields provided in mlt.fl when used with
|
||||
content stream. The similarity is calculated between the content stream's value and all
|
||||
fields listed in mlt.fl. (Dawid Weiss)
|
||||
|
||||
* SOLR-12103: Raise CryptoKeys.DEFAULT_KEYPAIR_LENGTH from 1024 to 2048. (Mark Miller)
|
||||
|
||||
* SOLR-12107: Fixed a error in [child] transformer that could ocur if documentCache was not used (hossman)
|
||||
|
@ -425,6 +429,7 @@ Upgrade Notes
|
|||
|
||||
New Features
|
||||
----------------------
|
||||
|
||||
* SOLR-11285: Simulation framework for autoscaling. (ab)
|
||||
|
||||
* LUCENE-2899: In the Solr analysis-extras contrib, added support for the
|
||||
|
|
|
@ -21,6 +21,8 @@ import java.io.Reader;
|
|||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -36,6 +38,7 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.StringUtils;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
|
@ -80,7 +83,13 @@ public class MoreLikeThisHandler extends RequestHandlerBase
|
|||
private static final Pattern splitList = Pattern.compile(",| ");
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
|
||||
static final String ERR_MSG_QUERY_OR_TEXT_REQUIRED =
|
||||
"MoreLikeThis requires either a query (?q=) or text to find similar documents.";
|
||||
|
||||
static final String ERR_MSG_SINGLE_STREAM_ONLY =
|
||||
"MoreLikeThis does not support multiple ContentStreams";
|
||||
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
super.init(args);
|
||||
|
@ -156,7 +165,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
|
|||
}
|
||||
if (iter.hasNext()) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"MoreLikeThis does not support multiple ContentStreams");
|
||||
ERR_MSG_SINGLE_STREAM_ONLY);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -191,7 +200,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
|
|||
}
|
||||
} else {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"MoreLikeThis requires either a query (?q=) or text to find similar documents.");
|
||||
ERR_MSG_QUERY_OR_TEXT_REQUIRED);
|
||||
}
|
||||
|
||||
} finally {
|
||||
|
@ -411,10 +420,31 @@ public class MoreLikeThisHandler extends RequestHandlerBase
|
|||
|
||||
public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
|
||||
{
|
||||
// analyzing with the first field: previous (stupid) behavior
|
||||
rawMLTQuery = mlt.like(mlt.getFieldNames()[0], reader);
|
||||
// SOLR-5351: if only check against a single field, use the reader directly. Otherwise we
|
||||
// repeat the stream's content for multiple fields so that query terms can be pulled from any
|
||||
// of those fields.
|
||||
String [] fields = mlt.getFieldNames();
|
||||
if (fields.length == 1) {
|
||||
rawMLTQuery = mlt.like(fields[0], reader);
|
||||
} else {
|
||||
CharsRefBuilder buffered = new CharsRefBuilder();
|
||||
char [] chunk = new char [1024];
|
||||
int len;
|
||||
while ((len = reader.read(chunk)) >= 0) {
|
||||
buffered.append(chunk, 0, len);
|
||||
}
|
||||
|
||||
Collection<Object> streamValue = Collections.singleton(buffered.get().toString());
|
||||
Map<String, Collection<Object>> multifieldDoc = new HashMap<>(fields.length);
|
||||
for (String field : fields) {
|
||||
multifieldDoc.put(field, streamValue);
|
||||
}
|
||||
|
||||
rawMLTQuery = mlt.like(multifieldDoc);
|
||||
}
|
||||
|
||||
boostedMLTQuery = getBoostedQuery( rawMLTQuery );
|
||||
if( terms != null ) {
|
||||
if (terms != null) {
|
||||
fillInterestingTermsFromMLTQuery( boostedMLTQuery, terms );
|
||||
}
|
||||
DocListAndSet results = new DocListAndSet();
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.solr.handler;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.*;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
|
@ -29,12 +30,10 @@ import org.apache.solr.response.SolrQueryResponse;
|
|||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
/**
|
||||
* TODO -- this needs to actually test the results/query etc
|
||||
*/
|
||||
public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 {
|
||||
|
||||
@BeforeClass
|
||||
public static void moreLikeThisBeforeClass() throws Exception {
|
||||
initCore("solrconfig.xml", "schema.xml");
|
||||
|
@ -47,27 +46,7 @@ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 {
|
|||
MoreLikeThisHandler mlt = new MoreLikeThisHandler();
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
SolrQueryRequestBase req = new SolrQueryRequestBase( core, params) {};
|
||||
|
||||
// requires 'q' or single content stream
|
||||
try {
|
||||
mlt.handleRequestBody( req, new SolrQueryResponse() );
|
||||
}
|
||||
catch( Exception ex ) {} // expected
|
||||
|
||||
// requires 'q' or single content stream
|
||||
try {
|
||||
ArrayList<ContentStream> streams = new ArrayList<>( 2 );
|
||||
streams.add( new ContentStreamBase.StringStream( "hello" ) );
|
||||
streams.add( new ContentStreamBase.StringStream( "there" ) );
|
||||
req.setContentStreams( streams );
|
||||
mlt.handleRequestBody( req, new SolrQueryResponse() );
|
||||
}
|
||||
catch( Exception ex ) {} // expected
|
||||
finally {
|
||||
req.close();
|
||||
}
|
||||
|
||||
assertU(adoc("id","42","name","Tom Cruise","subword","Top Gun","subword","Risky Business","subword","The Color of Money","subword","Minority Report","subword", "Days of Thunder","subword", "Eyes Wide Shut","subword", "Far and Away", "foo_ti","10"));
|
||||
assertU(adoc("id","43","name","Tom Hanks","subword","The Green Mile","subword","Forest Gump","subword","Philadelphia Story","subword","Big","subword","Cast Away", "foo_ti","10"));
|
||||
assertU(adoc("id","44","name","Harrison Ford","subword","Star Wars","subword","Indiana Jones","subword","Patriot Games","subword","Regarding Henry"));
|
||||
|
@ -75,7 +54,6 @@ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 {
|
|||
assertU(adoc("id","46","name","Nicole Kidman","subword","Batman","subword","Days of Thunder","subword","Eyes Wide Shut","subword","Far and Away"));
|
||||
assertU(commit());
|
||||
|
||||
params.set(CommonParams.Q, "id:42");
|
||||
params.set(MoreLikeThisParams.MLT, "true");
|
||||
params.set(MoreLikeThisParams.SIMILARITY_FIELDS, "name,subword");
|
||||
params.set(MoreLikeThisParams.INTERESTING_TERMS, "details");
|
||||
|
@ -83,67 +61,128 @@ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 {
|
|||
params.set(MoreLikeThisParams.MIN_DOC_FREQ,"1");
|
||||
params.set("indent","true");
|
||||
|
||||
SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params);
|
||||
assertQ("morelikethis - tom cruise",mltreq
|
||||
,"//result/doc[1]/str[@name='id'][.='46']"
|
||||
,"//result/doc[2]/str[@name='id'][.='43']");
|
||||
// requires 'q' or a single content stream
|
||||
SolrException ex = expectThrows(SolrException.class, () -> {
|
||||
try (SolrQueryRequestBase req = new SolrQueryRequestBase(core, params) {}) {
|
||||
mlt.handleRequestBody(req, new SolrQueryResponse());
|
||||
}
|
||||
});
|
||||
assertEquals(ex.getMessage(), MoreLikeThisHandler.ERR_MSG_QUERY_OR_TEXT_REQUIRED);
|
||||
assertEquals(ex.code(), SolrException.ErrorCode.BAD_REQUEST.code);
|
||||
|
||||
// requires a single content stream (more than one is not supported).
|
||||
ex = expectThrows(SolrException.class, () -> {
|
||||
try (SolrQueryRequestBase req = new SolrQueryRequestBase(core, params) {}) {
|
||||
ArrayList<ContentStream> streams = new ArrayList<>(2);
|
||||
streams.add(new ContentStreamBase.StringStream("hello"));
|
||||
streams.add(new ContentStreamBase.StringStream("there"));
|
||||
req.setContentStreams(streams);
|
||||
mlt.handleRequestBody(req, new SolrQueryResponse());
|
||||
}
|
||||
});
|
||||
assertEquals(ex.getMessage(), MoreLikeThisHandler.ERR_MSG_SINGLE_STREAM_ONLY);
|
||||
assertEquals(ex.code(), SolrException.ErrorCode.BAD_REQUEST.code);
|
||||
|
||||
params.set(CommonParams.Q, "id:42");
|
||||
|
||||
try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) {
|
||||
assertQ("morelikethis - tom cruise", mltreq,
|
||||
"//result/doc[1]/str[@name='id'][.='46']",
|
||||
"//result/doc[2]/str[@name='id'][.='43']");
|
||||
}
|
||||
|
||||
params.set(MoreLikeThisParams.BOOST, "true");
|
||||
mltreq.close(); mltreq = new LocalSolrQueryRequest( core, params);
|
||||
assertQ("morelikethis - tom cruise",mltreq
|
||||
,"//result/doc[1]/str[@name='id'][.='46']"
|
||||
,"//result/doc[2]/str[@name='id'][.='43']");
|
||||
|
||||
try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) {
|
||||
assertQ("morelikethis - tom cruise", mltreq,
|
||||
"//result/doc[1]/str[@name='id'][.='46']",
|
||||
"//result/doc[2]/str[@name='id'][.='43']");
|
||||
}
|
||||
|
||||
params.set(CommonParams.Q, "id:44");
|
||||
mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params);
|
||||
assertQ("morelike this - harrison ford",mltreq
|
||||
,"//result/doc[1]/str[@name='id'][.='45']");
|
||||
try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) {
|
||||
assertQ("morelike this - harrison ford", mltreq,
|
||||
"//result/doc[1]/str[@name='id'][.='45']");
|
||||
}
|
||||
|
||||
// test MoreLikeThis debug
|
||||
params.set(CommonParams.DEBUG_QUERY, "true");
|
||||
mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params);
|
||||
assertQ("morelike this - harrison ford",mltreq
|
||||
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='rawMLTQuery']"
|
||||
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='boostedMLTQuery']"
|
||||
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='realMLTQuery']"
|
||||
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/lst[@name='explain']/str[@name='45']"
|
||||
);
|
||||
try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) {
|
||||
assertQ("morelike this - harrison ford", mltreq,
|
||||
"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='rawMLTQuery']",
|
||||
"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='boostedMLTQuery']",
|
||||
"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='realMLTQuery']",
|
||||
"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/lst[@name='explain']/str[@name='45']"
|
||||
);
|
||||
}
|
||||
|
||||
// test that qparser plugins work
|
||||
params.remove(CommonParams.DEBUG_QUERY);
|
||||
params.set(CommonParams.Q, "{!field f=id}44");
|
||||
mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params);
|
||||
assertQ(mltreq
|
||||
,"//result/doc[1]/str[@name='id'][.='45']");
|
||||
try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) {
|
||||
assertQ(mltreq, "//result/doc[1]/str[@name='id'][.='45']");
|
||||
}
|
||||
|
||||
params.set(CommonParams.Q, "id:42");
|
||||
params.set(MoreLikeThisParams.QF,"name^5.0 subword^0.1");
|
||||
mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params);
|
||||
assertQ("morelikethis with weights",mltreq
|
||||
,"//result/doc[1]/str[@name='id'][.='43']"
|
||||
,"//result/doc[2]/str[@name='id'][.='46']");
|
||||
|
||||
try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) {
|
||||
assertQ("morelikethis with weights", mltreq,
|
||||
"//result/doc[1]/str[@name='id'][.='43']",
|
||||
"//result/doc[2]/str[@name='id'][.='46']");
|
||||
}
|
||||
|
||||
// test that qparser plugins work w/ the MoreLikeThisHandler
|
||||
params.set(CommonParams.QT, "/mlt");
|
||||
params.set(CommonParams.Q, "{!field f=id}44");
|
||||
mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params);
|
||||
assertQ(mltreq
|
||||
,"//result/doc[1]/str[@name='id'][.='45']");
|
||||
try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) {
|
||||
assertQ(mltreq, "//result/doc[1]/str[@name='id'][.='45']");
|
||||
}
|
||||
|
||||
// test that debugging works (test for MoreLikeThis*Handler*)
|
||||
params.set(CommonParams.QT, "/mlt");
|
||||
params.set(CommonParams.DEBUG_QUERY, "true");
|
||||
mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params);
|
||||
assertQ(mltreq
|
||||
,"//result/doc[1]/str[@name='id'][.='45']"
|
||||
,"//lst[@name='debug']/lst[@name='explain']"
|
||||
);
|
||||
try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) {
|
||||
assertQ(mltreq,
|
||||
"//result/doc[1]/str[@name='id'][.='45']",
|
||||
"//lst[@name='debug']/lst[@name='explain']"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// params.put(MoreLikeThisParams.QF,new String[]{"foo_ti"});
|
||||
// String response = h.query(mltreq);
|
||||
// System.out.println(response);
|
||||
@Test
|
||||
public void testMultifieldSimilarity() throws Exception
|
||||
{
|
||||
SolrCore core = h.getCore();
|
||||
MoreLikeThisHandler mlt = new MoreLikeThisHandler();
|
||||
|
||||
mltreq.close();
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
|
||||
assertU(adoc("id", "1", "name", "aaa bbb ccc", "subword", " zzz"));
|
||||
assertU(adoc("id", "2", "name", " bbb ccc", "subword", " bbb zzz"));
|
||||
assertU(adoc("id", "3", "name", " ccc", "subword", "aaa bbb zzz"));
|
||||
assertU(adoc("id", "4", "name", " ccc", "subword", " bbb "));
|
||||
assertU(commit());
|
||||
|
||||
params.set(CommonParams.QT, "/mlt");
|
||||
params.set(MoreLikeThisParams.MLT, "true");
|
||||
params.set(MoreLikeThisParams.SIMILARITY_FIELDS, "name,subword");
|
||||
params.set(MoreLikeThisParams.INTERESTING_TERMS, "details");
|
||||
params.set(MoreLikeThisParams.MIN_TERM_FREQ, "1");
|
||||
params.set(MoreLikeThisParams.MIN_DOC_FREQ, "2");
|
||||
params.set(MoreLikeThisParams.BOOST, true);
|
||||
params.set("indent", "true");
|
||||
|
||||
try (SolrQueryRequestBase req = new SolrQueryRequestBase(core, params) {}) {
|
||||
ArrayList<ContentStream> streams = new ArrayList<>(2);
|
||||
streams.add(new ContentStreamBase.StringStream("bbb", "zzz"));
|
||||
req.setContentStreams(streams);
|
||||
|
||||
// Make sure we have terms from both fields in the interestingTerms array and all documents have been
|
||||
// retrieved as matching.
|
||||
assertQ(req,
|
||||
"//lst[@name = 'interestingTerms']/float[@name = 'subword:bbb']",
|
||||
"//lst[@name = 'interestingTerms']/float[@name = 'name:bbb']",
|
||||
"//result[@name = 'response' and @numFound = '4']");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue