SOLR-860: Add debug output for MoreLikeThis

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1064735 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2011-01-28 15:37:43 +00:00
parent a7a9be923e
commit 92874ddaa6
4 changed files with 102 additions and 28 deletions

View File

@ -429,6 +429,8 @@ New Features
* SOLR-2263: Add ability for RawResponseWriter to stream binary files as well as
text files. (Eric Pugh via yonik)
* SOLR-860: Add debug output for MoreLikeThis. (koji)
Optimizations
----------------------

View File

@ -232,7 +232,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
// Copied from StandardRequestHandler... perhaps it should be added to doStandardDebug?
if (dbg == true) {
try {
NamedList<Object> dbgInfo = SolrPluginUtils.doStandardDebug(req, q, mlt.mltquery, mltDocs.docList, dbgQuery, dbgResults);
NamedList<Object> dbgInfo = SolrPluginUtils.doStandardDebug(req, q, mlt.getRawMLTQuery(), mltDocs.docList, dbgQuery, dbgResults);
if (null != dbgInfo) {
if (null != filters) {
dbgInfo.add("filter_queries",req.getParams().getParams(CommonParams.FQ));
@ -279,8 +279,6 @@ public class MoreLikeThisHandler extends RequestHandlerBase
final boolean needDocSet;
Map<String,Float> boostFields;
Query mltquery; // expose this for debugging
public MoreLikeThisHelper( SolrParams params, SolrIndexSearcher searcher )
{
this.searcher = searcher;
@ -310,9 +308,26 @@ public class MoreLikeThisHandler extends RequestHandlerBase
boostFields = SolrPluginUtils.parseFieldBoosts(params.getParams(MoreLikeThisParams.QF));
}
private void setBoosts(Query mltquery) {
private Query rawMLTQuery;
private Query boostedMLTQuery;
private BooleanQuery realMLTQuery;
public Query getRawMLTQuery(){
return rawMLTQuery;
}
public Query getBoostedMLTQuery(){
return boostedMLTQuery;
}
public Query getRealMLTQuery(){
return realMLTQuery;
}
private Query getBoostedQuery(Query mltquery) {
BooleanQuery boostedQuery = (BooleanQuery)mltquery.clone();
if (boostFields.size() > 0) {
List clauses = ((BooleanQuery)mltquery).clauses();
List clauses = boostedQuery.clauses();
for( Object o : clauses ) {
TermQuery q = (TermQuery)((BooleanClause)o).getQuery();
Float b = this.boostFields.get(q.getTerm().field());
@ -321,49 +336,51 @@ public class MoreLikeThisHandler extends RequestHandlerBase
}
}
}
return boostedQuery;
}
public DocListAndSet getMoreLikeThis( int id, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
{
Document doc = reader.document(id);
mltquery = mlt.like(id);
setBoosts(mltquery);
rawMLTQuery = mlt.like(id);
boostedMLTQuery = getBoostedQuery( rawMLTQuery );
if( terms != null ) {
fillInterestingTermsFromMLTQuery( mltquery, terms );
fillInterestingTermsFromMLTQuery( rawMLTQuery, terms );
}
// exclude current document from results
BooleanQuery mltQuery = new BooleanQuery();
mltQuery.add(mltquery, BooleanClause.Occur.MUST);
mltQuery.add(
realMLTQuery = new BooleanQuery();
realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
realMLTQuery.add(
new TermQuery(new Term(uniqueKeyField.getName(), uniqueKeyField.getType().storedToIndexed(doc.getFieldable(uniqueKeyField.getName())))),
BooleanClause.Occur.MUST_NOT);
DocListAndSet results = new DocListAndSet();
if (this.needDocSet) {
results = searcher.getDocListAndSet(mltQuery, filters, null, start, rows, flags);
results = searcher.getDocListAndSet(realMLTQuery, filters, null, start, rows, flags);
} else {
results.docList = searcher.getDocList(mltQuery, filters, null, start, rows, flags);
results.docList = searcher.getDocList(realMLTQuery, filters, null, start, rows, flags);
}
return results;
}
public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
{
mltquery = mlt.like(reader);
setBoosts(mltquery);
rawMLTQuery = mlt.like(reader);
boostedMLTQuery = getBoostedQuery( rawMLTQuery );
if( terms != null ) {
fillInterestingTermsFromMLTQuery( mltquery, terms );
fillInterestingTermsFromMLTQuery( boostedMLTQuery, terms );
}
DocListAndSet results = new DocListAndSet();
if (this.needDocSet) {
results = searcher.getDocListAndSet(mltquery, filters, null, start, rows, flags);
results = searcher.getDocListAndSet( boostedMLTQuery, filters, null, start, rows, flags);
} else {
results.docList = searcher.getDocList(mltquery, filters, null, start, rows, flags);
results.docList = searcher.getDocList( boostedMLTQuery, filters, null, start, rows, flags);
}
return results;
}
@Deprecated
public NamedList<DocList> getMoreLikeThese( DocList docs, int rows, int flags ) throws IOException
{
IndexSchema schema = searcher.getSchema();
@ -382,7 +399,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
private void fillInterestingTermsFromMLTQuery( Query query, List<InterestingTerm> terms )
{
List clauses = ((BooleanQuery)mltquery).clauses();
List clauses = ((BooleanQuery)query).clauses();
for( Object o : clauses ) {
TermQuery q = (TermQuery)((BooleanClause)o).getQuery();
InterestingTerm it = new InterestingTerm();

View File

@ -23,8 +23,12 @@ import java.net.URL;
import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.MoreLikeThisHandler;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.SolrIndexSearcher;
/**
@ -50,18 +54,59 @@ public class MoreLikeThisComponent extends SearchComponent
if( p.getBool( MoreLikeThisParams.MLT, false ) ) {
SolrIndexSearcher searcher = rb.req.getSearcher();
MoreLikeThisHandler.MoreLikeThisHelper mlt
= new MoreLikeThisHandler.MoreLikeThisHelper( p, searcher );
int mltcount = p.getInt( MoreLikeThisParams.DOC_COUNT, 5 );
NamedList<DocList> sim = mlt.getMoreLikeThese(
rb.getResults().docList, mltcount, rb.getFieldFlags() );
NamedList<DocList> sim = getMoreLikeThese( rb, searcher,
rb.getResults().docList, rb.getFieldFlags() );
// TODO ???? add this directly to the response?
rb.rsp.add( "moreLikeThis", sim );
}
}
NamedList<DocList> getMoreLikeThese( ResponseBuilder rb, SolrIndexSearcher searcher,
DocList docs, int flags ) throws IOException {
SolrParams p = rb.req.getParams();
IndexSchema schema = searcher.getSchema();
MoreLikeThisHandler.MoreLikeThisHelper mltHelper
= new MoreLikeThisHandler.MoreLikeThisHelper( p, searcher );
NamedList<DocList> mlt = new SimpleOrderedMap<DocList>();
DocIterator iterator = docs.iterator();
SimpleOrderedMap<Object> dbg = null;
if( rb.isDebug() ){
dbg = new SimpleOrderedMap<Object>();
}
while( iterator.hasNext() ) {
int id = iterator.nextDoc();
int rows = p.getInt( MoreLikeThisParams.DOC_COUNT, 5 );
DocListAndSet sim = mltHelper.getMoreLikeThis( id, 0, rows, null, null, flags );
String name = schema.printableUniqueKey( searcher.doc( id ) );
mlt.add(name, sim.docList);
if( dbg != null ){
SimpleOrderedMap<Object> docDbg = new SimpleOrderedMap<Object>();
docDbg.add( "rawMLTQuery", mltHelper.getRawMLTQuery().toString() );
docDbg.add( "boostedMLTQuery", mltHelper.getBoostedMLTQuery().toString() );
docDbg.add( "realMLTQuery", mltHelper.getRealMLTQuery().toString() );
SimpleOrderedMap<Object> explains = new SimpleOrderedMap<Object>();
DocIterator mltIte = sim.docList.iterator();
while( mltIte.hasNext() ){
int mltid = mltIte.nextDoc();
String key = schema.printableUniqueKey( searcher.doc( mltid ) );
explains.add( key, searcher.explain( mltHelper.getRealMLTQuery(), mltid ) );
}
docDbg.add( "explain", explains );
dbg.add( name, docDbg );
}
}
// add debug information
if( dbg != null ){
rb.addDebugInfo( "moreLikeThis", dbg );
}
return mlt;
}
/////////////////////////////////////////////
/// SolrInfoMBean
////////////////////////////////////////////

View File

@ -94,7 +94,17 @@ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 {
assertQ("morelike this - harrison ford",mltreq
,"//result/doc[1]/int[@name='id'][.='45']");
// test MoreLikeThis debug
params.set(CommonParams.DEBUG_QUERY, "true");
assertQ("morelike this - harrison ford",mltreq
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='rawMLTQuery']"
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='boostedMLTQuery']"
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='realMLTQuery']"
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/lst[@name='explain']/str[@name='45']"
);
// test that qparser plugins work
params.remove(CommonParams.DEBUG_QUERY);
params.set(CommonParams.Q, "{!field f=id}44");
assertQ(mltreq
,"//result/doc[1]/int[@name='id'][.='45']");
@ -112,9 +122,9 @@ public class MoreLikeThisHandlerTest extends SolrTestCaseJ4 {
assertQ(mltreq
,"//result/doc[1]/int[@name='id'][.='45']");
// test that debugging works
// test that debugging works (test for MoreLikeThis*Handler*)
params.set(CommonParams.QT, "/mlt");
params.set("debugQuery", "true");
params.set(CommonParams.DEBUG_QUERY, "true");
assertQ(mltreq
,"//result/doc[1]/int[@name='id'][.='45']"
,"//lst[@name='debug']/lst[@name='explain']"