diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 9159e92b6fb..3218ab390d7 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -131,6 +131,10 @@ New Features Bug Fixes ---------------------- +* SOLR-5351: Fixed More Like This Handler to use all fields provided in mlt.fl when used with + content stream. The similarity is calculated between the content stream's value and all + fields listed in mlt.fl. (Dawid Weiss) + * SOLR-12103: Raise CryptoKeys.DEFAULT_KEYPAIR_LENGTH from 1024 to 2048. (Mark Miller) * SOLR-12107: Fixed a error in [child] transformer that could ocur if documentCache was not used (hossman) @@ -425,6 +429,7 @@ Upgrade Notes New Features ---------------------- + * SOLR-11285: Simulation framework for autoscaling. (ab) * LUCENE-2899: In the Solr analysis-extras contrib, added support for the diff --git a/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java b/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java index 62f1016bbaf..cce2939c583 100644 --- a/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java @@ -21,6 +21,8 @@ import java.io.Reader; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -36,6 +38,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.CharsRefBuilder; import org.apache.solr.common.SolrException; import org.apache.solr.common.StringUtils; import org.apache.solr.common.params.CommonParams; @@ -80,7 +83,13 @@ public class MoreLikeThisHandler extends RequestHandlerBase private static final Pattern splitList = Pattern.compile(",| "); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - + + static final String ERR_MSG_QUERY_OR_TEXT_REQUIRED = + "MoreLikeThis requires either a query (?q=) or text to find similar documents."; + + static final String ERR_MSG_SINGLE_STREAM_ONLY = + "MoreLikeThis does not support multiple ContentStreams"; + @Override public void init(NamedList args) { super.init(args); @@ -156,7 +165,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase } if (iter.hasNext()) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - "MoreLikeThis does not support multiple ContentStreams"); + ERR_MSG_SINGLE_STREAM_ONLY); } } } @@ -191,7 +200,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase } } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - "MoreLikeThis requires either a query (?q=) or text to find similar documents."); + ERR_MSG_QUERY_OR_TEXT_REQUIRED); } } finally { @@ -411,10 +420,31 @@ public class MoreLikeThisHandler extends RequestHandlerBase public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List filters, List terms, int flags ) throws IOException { - // analyzing with the first field: previous (stupid) behavior - rawMLTQuery = mlt.like(mlt.getFieldNames()[0], reader); + // SOLR-5351: if only check against a single field, use the reader directly. Otherwise we + // repeat the stream's content for multiple fields so that query terms can be pulled from any + // of those fields. + String [] fields = mlt.getFieldNames(); + if (fields.length == 1) { + rawMLTQuery = mlt.like(fields[0], reader); + } else { + CharsRefBuilder buffered = new CharsRefBuilder(); + char [] chunk = new char [1024]; + int len; + while ((len = reader.read(chunk)) >= 0) { + buffered.append(chunk, 0, len); + } + + Collection streamValue = Collections.singleton(buffered.get().toString()); + Map> multifieldDoc = new HashMap<>(fields.length); + for (String field : fields) { + multifieldDoc.put(field, streamValue); + } + + rawMLTQuery = mlt.like(multifieldDoc); + } + boostedMLTQuery = getBoostedQuery( rawMLTQuery ); - if( terms != null ) { + if (terms != null) { fillInterestingTermsFromMLTQuery( boostedMLTQuery, terms ); } DocListAndSet results = new DocListAndSet(); diff --git a/solr/core/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java index aa63ce32576..6b80014c0fb 100644 --- a/solr/core/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java @@ -18,6 +18,7 @@ package org.apache.solr.handler; import java.util.ArrayList; import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrException; import org.apache.solr.common.params.*; import org.apache.solr.common.util.ContentStream; import org.apache.solr.common.util.ContentStreamBase; @@ -29,12 +30,10 @@ import org.apache.solr.response.SolrQueryResponse; import org.junit.BeforeClass; import org.junit.Test; - /** * TODO -- this needs to actually test the results/query etc */ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 { - @BeforeClass public static void moreLikeThisBeforeClass() throws Exception { initCore("solrconfig.xml", "schema.xml"); @@ -47,27 +46,7 @@ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 { MoreLikeThisHandler mlt = new MoreLikeThisHandler(); ModifiableSolrParams params = new ModifiableSolrParams(); - SolrQueryRequestBase req = new SolrQueryRequestBase( core, params) {}; - - // requires 'q' or single content stream - try { - mlt.handleRequestBody( req, new SolrQueryResponse() ); - } - catch( Exception ex ) {} // expected - // requires 'q' or single content stream - try { - ArrayList streams = new ArrayList<>( 2 ); - streams.add( new ContentStreamBase.StringStream( "hello" ) ); - streams.add( new ContentStreamBase.StringStream( "there" ) ); - req.setContentStreams( streams ); - mlt.handleRequestBody( req, new SolrQueryResponse() ); - } - catch( Exception ex ) {} // expected - finally { - req.close(); - } - assertU(adoc("id","42","name","Tom Cruise","subword","Top Gun","subword","Risky Business","subword","The Color of Money","subword","Minority Report","subword", "Days of Thunder","subword", "Eyes Wide Shut","subword", "Far and Away", "foo_ti","10")); assertU(adoc("id","43","name","Tom Hanks","subword","The Green Mile","subword","Forest Gump","subword","Philadelphia Story","subword","Big","subword","Cast Away", "foo_ti","10")); assertU(adoc("id","44","name","Harrison Ford","subword","Star Wars","subword","Indiana Jones","subword","Patriot Games","subword","Regarding Henry")); @@ -75,7 +54,6 @@ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 { assertU(adoc("id","46","name","Nicole Kidman","subword","Batman","subword","Days of Thunder","subword","Eyes Wide Shut","subword","Far and Away")); assertU(commit()); - params.set(CommonParams.Q, "id:42"); params.set(MoreLikeThisParams.MLT, "true"); params.set(MoreLikeThisParams.SIMILARITY_FIELDS, "name,subword"); params.set(MoreLikeThisParams.INTERESTING_TERMS, "details"); @@ -83,67 +61,128 @@ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 { params.set(MoreLikeThisParams.MIN_DOC_FREQ,"1"); params.set("indent","true"); - SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params); - assertQ("morelikethis - tom cruise",mltreq - ,"//result/doc[1]/str[@name='id'][.='46']" - ,"//result/doc[2]/str[@name='id'][.='43']"); + // requires 'q' or a single content stream + SolrException ex = expectThrows(SolrException.class, () -> { + try (SolrQueryRequestBase req = new SolrQueryRequestBase(core, params) {}) { + mlt.handleRequestBody(req, new SolrQueryResponse()); + } + }); + assertEquals(ex.getMessage(), MoreLikeThisHandler.ERR_MSG_QUERY_OR_TEXT_REQUIRED); + assertEquals(ex.code(), SolrException.ErrorCode.BAD_REQUEST.code); + + // requires a single content stream (more than one is not supported). + ex = expectThrows(SolrException.class, () -> { + try (SolrQueryRequestBase req = new SolrQueryRequestBase(core, params) {}) { + ArrayList streams = new ArrayList<>(2); + streams.add(new ContentStreamBase.StringStream("hello")); + streams.add(new ContentStreamBase.StringStream("there")); + req.setContentStreams(streams); + mlt.handleRequestBody(req, new SolrQueryResponse()); + } + }); + assertEquals(ex.getMessage(), MoreLikeThisHandler.ERR_MSG_SINGLE_STREAM_ONLY); + assertEquals(ex.code(), SolrException.ErrorCode.BAD_REQUEST.code); + + params.set(CommonParams.Q, "id:42"); + + try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) { + assertQ("morelikethis - tom cruise", mltreq, + "//result/doc[1]/str[@name='id'][.='46']", + "//result/doc[2]/str[@name='id'][.='43']"); + } params.set(MoreLikeThisParams.BOOST, "true"); - mltreq.close(); mltreq = new LocalSolrQueryRequest( core, params); - assertQ("morelikethis - tom cruise",mltreq - ,"//result/doc[1]/str[@name='id'][.='46']" - ,"//result/doc[2]/str[@name='id'][.='43']"); + + try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) { + assertQ("morelikethis - tom cruise", mltreq, + "//result/doc[1]/str[@name='id'][.='46']", + "//result/doc[2]/str[@name='id'][.='43']"); + } params.set(CommonParams.Q, "id:44"); - mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params); - assertQ("morelike this - harrison ford",mltreq - ,"//result/doc[1]/str[@name='id'][.='45']"); + try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) { + assertQ("morelike this - harrison ford", mltreq, + "//result/doc[1]/str[@name='id'][.='45']"); + } // test MoreLikeThis debug params.set(CommonParams.DEBUG_QUERY, "true"); - mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params); - assertQ("morelike this - harrison ford",mltreq - ,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='rawMLTQuery']" - ,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='boostedMLTQuery']" - ,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='realMLTQuery']" - ,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/lst[@name='explain']/str[@name='45']" - ); + try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) { + assertQ("morelike this - harrison ford", mltreq, + "//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='rawMLTQuery']", + "//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='boostedMLTQuery']", + "//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='realMLTQuery']", + "//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/lst[@name='explain']/str[@name='45']" + ); + } // test that qparser plugins work params.remove(CommonParams.DEBUG_QUERY); params.set(CommonParams.Q, "{!field f=id}44"); - mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params); - assertQ(mltreq - ,"//result/doc[1]/str[@name='id'][.='45']"); + try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) { + assertQ(mltreq, "//result/doc[1]/str[@name='id'][.='45']"); + } params.set(CommonParams.Q, "id:42"); params.set(MoreLikeThisParams.QF,"name^5.0 subword^0.1"); - mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params); - assertQ("morelikethis with weights",mltreq - ,"//result/doc[1]/str[@name='id'][.='43']" - ,"//result/doc[2]/str[@name='id'][.='46']"); - + try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) { + assertQ("morelikethis with weights", mltreq, + "//result/doc[1]/str[@name='id'][.='43']", + "//result/doc[2]/str[@name='id'][.='46']"); + } // test that qparser plugins work w/ the MoreLikeThisHandler params.set(CommonParams.QT, "/mlt"); params.set(CommonParams.Q, "{!field f=id}44"); - mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params); - assertQ(mltreq - ,"//result/doc[1]/str[@name='id'][.='45']"); + try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) { + assertQ(mltreq, "//result/doc[1]/str[@name='id'][.='45']"); + } // test that debugging works (test for MoreLikeThis*Handler*) params.set(CommonParams.QT, "/mlt"); params.set(CommonParams.DEBUG_QUERY, "true"); - mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params); - assertQ(mltreq - ,"//result/doc[1]/str[@name='id'][.='45']" - ,"//lst[@name='debug']/lst[@name='explain']" - ); + try (SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params)) { + assertQ(mltreq, + "//result/doc[1]/str[@name='id'][.='45']", + "//lst[@name='debug']/lst[@name='explain']" + ); + } + } - // params.put(MoreLikeThisParams.QF,new String[]{"foo_ti"}); - // String response = h.query(mltreq); - // System.out.println(response); + @Test + public void testMultifieldSimilarity() throws Exception + { + SolrCore core = h.getCore(); + MoreLikeThisHandler mlt = new MoreLikeThisHandler(); - mltreq.close(); + ModifiableSolrParams params = new ModifiableSolrParams(); + + assertU(adoc("id", "1", "name", "aaa bbb ccc", "subword", " zzz")); + assertU(adoc("id", "2", "name", " bbb ccc", "subword", " bbb zzz")); + assertU(adoc("id", "3", "name", " ccc", "subword", "aaa bbb zzz")); + assertU(adoc("id", "4", "name", " ccc", "subword", " bbb ")); + assertU(commit()); + + params.set(CommonParams.QT, "/mlt"); + params.set(MoreLikeThisParams.MLT, "true"); + params.set(MoreLikeThisParams.SIMILARITY_FIELDS, "name,subword"); + params.set(MoreLikeThisParams.INTERESTING_TERMS, "details"); + params.set(MoreLikeThisParams.MIN_TERM_FREQ, "1"); + params.set(MoreLikeThisParams.MIN_DOC_FREQ, "2"); + params.set(MoreLikeThisParams.BOOST, true); + params.set("indent", "true"); + + try (SolrQueryRequestBase req = new SolrQueryRequestBase(core, params) {}) { + ArrayList streams = new ArrayList<>(2); + streams.add(new ContentStreamBase.StringStream("bbb", "zzz")); + req.setContentStreams(streams); + + // Make sure we have terms from both fields in the interestingTerms array and all documents have been + // retrieved as matching. + assertQ(req, + "//lst[@name = 'interestingTerms']/float[@name = 'subword:bbb']", + "//lst[@name = 'interestingTerms']/float[@name = 'name:bbb']", + "//result[@name = 'response' and @numFound = '4']"); + } } }