diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index d1275cf0f39..8a3ad22e8e1 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -429,6 +429,8 @@ New Features * SOLR-2263: Add ability for RawResponseWriter to stream binary files as well as text files. (Eric Pugh via yonik) +* SOLR-860: Add debug output for MoreLikeThis. (koji) + Optimizations ---------------------- diff --git a/solr/src/java/org/apache/solr/handler/MoreLikeThisHandler.java b/solr/src/java/org/apache/solr/handler/MoreLikeThisHandler.java index e367d8922f4..fcd41e24dd9 100644 --- a/solr/src/java/org/apache/solr/handler/MoreLikeThisHandler.java +++ b/solr/src/java/org/apache/solr/handler/MoreLikeThisHandler.java @@ -232,7 +232,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase // Copied from StandardRequestHandler... perhaps it should be added to doStandardDebug? if (dbg == true) { try { - NamedList dbgInfo = SolrPluginUtils.doStandardDebug(req, q, mlt.mltquery, mltDocs.docList, dbgQuery, dbgResults); + NamedList dbgInfo = SolrPluginUtils.doStandardDebug(req, q, mlt.getRawMLTQuery(), mltDocs.docList, dbgQuery, dbgResults); if (null != dbgInfo) { if (null != filters) { dbgInfo.add("filter_queries",req.getParams().getParams(CommonParams.FQ)); @@ -279,8 +279,6 @@ public class MoreLikeThisHandler extends RequestHandlerBase final boolean needDocSet; Map boostFields; - Query mltquery; // expose this for debugging - public MoreLikeThisHelper( SolrParams params, SolrIndexSearcher searcher ) { this.searcher = searcher; @@ -310,9 +308,26 @@ public class MoreLikeThisHandler extends RequestHandlerBase boostFields = SolrPluginUtils.parseFieldBoosts(params.getParams(MoreLikeThisParams.QF)); } - private void setBoosts(Query mltquery) { + private Query rawMLTQuery; + private Query boostedMLTQuery; + private BooleanQuery realMLTQuery; + + public Query getRawMLTQuery(){ + return rawMLTQuery; + } + + public Query getBoostedMLTQuery(){ + return boostedMLTQuery; + } + + public Query getRealMLTQuery(){ + return realMLTQuery; + } + + private Query getBoostedQuery(Query mltquery) { + BooleanQuery boostedQuery = (BooleanQuery)mltquery.clone(); if (boostFields.size() > 0) { - List clauses = ((BooleanQuery)mltquery).clauses(); + List clauses = boostedQuery.clauses(); for( Object o : clauses ) { TermQuery q = (TermQuery)((BooleanClause)o).getQuery(); Float b = this.boostFields.get(q.getTerm().field()); @@ -321,49 +336,51 @@ public class MoreLikeThisHandler extends RequestHandlerBase } } } + return boostedQuery; } public DocListAndSet getMoreLikeThis( int id, int start, int rows, List filters, List terms, int flags ) throws IOException { Document doc = reader.document(id); - mltquery = mlt.like(id); - setBoosts(mltquery); + rawMLTQuery = mlt.like(id); + boostedMLTQuery = getBoostedQuery( rawMLTQuery ); if( terms != null ) { - fillInterestingTermsFromMLTQuery( mltquery, terms ); + fillInterestingTermsFromMLTQuery( rawMLTQuery, terms ); } // exclude current document from results - BooleanQuery mltQuery = new BooleanQuery(); - mltQuery.add(mltquery, BooleanClause.Occur.MUST); - mltQuery.add( + realMLTQuery = new BooleanQuery(); + realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST); + realMLTQuery.add( new TermQuery(new Term(uniqueKeyField.getName(), uniqueKeyField.getType().storedToIndexed(doc.getFieldable(uniqueKeyField.getName())))), BooleanClause.Occur.MUST_NOT); DocListAndSet results = new DocListAndSet(); if (this.needDocSet) { - results = searcher.getDocListAndSet(mltQuery, filters, null, start, rows, flags); + results = searcher.getDocListAndSet(realMLTQuery, filters, null, start, rows, flags); } else { - results.docList = searcher.getDocList(mltQuery, filters, null, start, rows, flags); + results.docList = searcher.getDocList(realMLTQuery, filters, null, start, rows, flags); } return results; } public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List filters, List terms, int flags ) throws IOException { - mltquery = mlt.like(reader); - setBoosts(mltquery); + rawMLTQuery = mlt.like(reader); + boostedMLTQuery = getBoostedQuery( rawMLTQuery ); if( terms != null ) { - fillInterestingTermsFromMLTQuery( mltquery, terms ); + fillInterestingTermsFromMLTQuery( boostedMLTQuery, terms ); } DocListAndSet results = new DocListAndSet(); if (this.needDocSet) { - results = searcher.getDocListAndSet(mltquery, filters, null, start, rows, flags); + results = searcher.getDocListAndSet( boostedMLTQuery, filters, null, start, rows, flags); } else { - results.docList = searcher.getDocList(mltquery, filters, null, start, rows, flags); + results.docList = searcher.getDocList( boostedMLTQuery, filters, null, start, rows, flags); } return results; } - + + @Deprecated public NamedList getMoreLikeThese( DocList docs, int rows, int flags ) throws IOException { IndexSchema schema = searcher.getSchema(); @@ -382,7 +399,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase private void fillInterestingTermsFromMLTQuery( Query query, List terms ) { - List clauses = ((BooleanQuery)mltquery).clauses(); + List clauses = ((BooleanQuery)query).clauses(); for( Object o : clauses ) { TermQuery q = (TermQuery)((BooleanClause)o).getQuery(); InterestingTerm it = new InterestingTerm(); diff --git a/solr/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java b/solr/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java index 61c97d1faf4..8851ff7761d 100644 --- a/solr/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java @@ -23,8 +23,12 @@ import java.net.URL; import org.apache.solr.common.params.MoreLikeThisParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.handler.MoreLikeThisHandler; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; +import org.apache.solr.search.DocListAndSet; import org.apache.solr.search.SolrIndexSearcher; /** @@ -50,18 +54,59 @@ public class MoreLikeThisComponent extends SearchComponent if( p.getBool( MoreLikeThisParams.MLT, false ) ) { SolrIndexSearcher searcher = rb.req.getSearcher(); - MoreLikeThisHandler.MoreLikeThisHelper mlt - = new MoreLikeThisHandler.MoreLikeThisHelper( p, searcher ); - - int mltcount = p.getInt( MoreLikeThisParams.DOC_COUNT, 5 ); - NamedList sim = mlt.getMoreLikeThese( - rb.getResults().docList, mltcount, rb.getFieldFlags() ); + NamedList sim = getMoreLikeThese( rb, searcher, + rb.getResults().docList, rb.getFieldFlags() ); // TODO ???? add this directly to the response? rb.rsp.add( "moreLikeThis", sim ); } } + NamedList getMoreLikeThese( ResponseBuilder rb, SolrIndexSearcher searcher, + DocList docs, int flags ) throws IOException { + SolrParams p = rb.req.getParams(); + IndexSchema schema = searcher.getSchema(); + MoreLikeThisHandler.MoreLikeThisHelper mltHelper + = new MoreLikeThisHandler.MoreLikeThisHelper( p, searcher ); + NamedList mlt = new SimpleOrderedMap(); + DocIterator iterator = docs.iterator(); + + SimpleOrderedMap dbg = null; + if( rb.isDebug() ){ + dbg = new SimpleOrderedMap(); + } + + while( iterator.hasNext() ) { + int id = iterator.nextDoc(); + int rows = p.getInt( MoreLikeThisParams.DOC_COUNT, 5 ); + DocListAndSet sim = mltHelper.getMoreLikeThis( id, 0, rows, null, null, flags ); + String name = schema.printableUniqueKey( searcher.doc( id ) ); + mlt.add(name, sim.docList); + + if( dbg != null ){ + SimpleOrderedMap docDbg = new SimpleOrderedMap(); + docDbg.add( "rawMLTQuery", mltHelper.getRawMLTQuery().toString() ); + docDbg.add( "boostedMLTQuery", mltHelper.getBoostedMLTQuery().toString() ); + docDbg.add( "realMLTQuery", mltHelper.getRealMLTQuery().toString() ); + SimpleOrderedMap explains = new SimpleOrderedMap(); + DocIterator mltIte = sim.docList.iterator(); + while( mltIte.hasNext() ){ + int mltid = mltIte.nextDoc(); + String key = schema.printableUniqueKey( searcher.doc( mltid ) ); + explains.add( key, searcher.explain( mltHelper.getRealMLTQuery(), mltid ) ); + } + docDbg.add( "explain", explains ); + dbg.add( name, docDbg ); + } + } + + // add debug information + if( dbg != null ){ + rb.addDebugInfo( "moreLikeThis", dbg ); + } + return mlt; + } + ///////////////////////////////////////////// /// SolrInfoMBean //////////////////////////////////////////// diff --git a/solr/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java b/solr/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java index 63b1edde582..6dbae21f244 100644 --- a/solr/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java +++ b/solr/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java @@ -94,7 +94,17 @@ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 { assertQ("morelike this - harrison ford",mltreq ,"//result/doc[1]/int[@name='id'][.='45']"); + // test MoreLikeThis debug + params.set(CommonParams.DEBUG_QUERY, "true"); + assertQ("morelike this - harrison ford",mltreq + ,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='rawMLTQuery']" + ,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='boostedMLTQuery']" + ,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='realMLTQuery']" + ,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/lst[@name='explain']/str[@name='45']" + ); + // test that qparser plugins work + params.remove(CommonParams.DEBUG_QUERY); params.set(CommonParams.Q, "{!field f=id}44"); assertQ(mltreq ,"//result/doc[1]/int[@name='id'][.='45']"); @@ -112,9 +122,9 @@ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 { assertQ(mltreq ,"//result/doc[1]/int[@name='id'][.='45']"); - // test that debugging works + // test that debugging works (test for MoreLikeThis*Handler*) params.set(CommonParams.QT, "/mlt"); - params.set("debugQuery", "true"); + params.set(CommonParams.DEBUG_QUERY, "true"); assertQ(mltreq ,"//result/doc[1]/int[@name='id'][.='45']" ,"//lst[@name='debug']/lst[@name='explain']"