SOLR-595: Added support for Field level boosting in MoreLikeThis

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@668638 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2008-06-17 12:48:57 +00:00
parent 96d68357d6
commit a1b02cc58b
4 changed files with 59 additions and 2 deletions

View File

@ -288,6 +288,8 @@ New Features
51. SOLR-536: Add a DocumentObjectBinder to solrj that converts Objects to and
from SolrDocuments. (Noble Paul via ryan)
52. SOLR-595: Add support for Field level boosting in the MoreLikeThis Handler. (Tom Morton, gsingers)
Changes in runtime behavior
1. SOLR-559: use Lucene updateDocument, deleteDocuments methods. This

View File

@ -35,6 +35,7 @@ public interface MoreLikeThisParams
public final static String MAX_QUERY_TERMS = PREFIX + "maxqt";
public final static String MAX_NUM_TOKENS_PARSED = PREFIX + "maxntp";
public final static String BOOST = PREFIX + "boost"; // boost or not?
public final static String QF = PREFIX + "qf"; //boosting applied to mlt fields
// the /mlt request handler uses 'rows'
public final static String DOC_COUNT = PREFIX + "count";

View File

@ -23,8 +23,11 @@ import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.lucene.document.Document;
@ -37,6 +40,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similar.MoreLikeThis;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.DisMaxParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.common.params.SolrParams;
@ -51,7 +55,6 @@ import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.QueryParsing;
@ -131,7 +134,6 @@ public class MoreLikeThisHandler extends RequestHandlerBase
// Matching options
boolean includeMatch = params.getBool( MoreLikeThisParams.MATCH_INCLUDE, true );
int matchOffset = params.getInt( MoreLikeThisParams.MATCH_OFFSET, 0 );
// Find the base match
Query query = QueryParsing.parseQuery(q, params.get(CommonParams.DF), params, req.getSchema());
DocList match = searcher.getDocList(query, null, null, matchOffset, 1, flags ); // only get the first one...
@ -231,6 +233,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
final IndexReader reader;
final SchemaField uniqueKeyField;
final boolean needDocSet;
Map<String,Float> boostFields;
Query mltquery; // expose this for debugging
@ -260,12 +263,27 @@ public class MoreLikeThisHandler extends RequestHandlerBase
mlt.setMaxQueryTerms( params.getInt(MoreLikeThisParams.MAX_QUERY_TERMS, MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
mlt.setMaxNumTokensParsed(params.getInt(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
mlt.setBoost( params.getBool(MoreLikeThisParams.BOOST, false ) );
boostFields = SolrPluginUtils.parseFieldBoosts(params.getParams(MoreLikeThisParams.QF));
}
private void setBoosts(Query mltquery) {
if (boostFields.size() > 0) {
List clauses = ((BooleanQuery)mltquery).clauses();
for( Object o : clauses ) {
TermQuery q = (TermQuery)((BooleanClause)o).getQuery();
Float b = this.boostFields.get(q.getTerm().field());
if (b != null) {
q.setBoost(b*q.getBoost());
}
}
}
}
public DocListAndSet getMoreLikeThis( int id, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
{
Document doc = reader.document(id);
mltquery = mlt.like(id);
setBoosts(mltquery);
if( terms != null ) {
fillInterestingTermsFromMLTQuery( mltquery, terms );
}
@ -289,6 +307,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
{
mltquery = mlt.like(reader);
setBoosts(mltquery);
if( terms != null ) {
fillInterestingTermsFromMLTQuery( mltquery, terms );
}

View File

@ -21,11 +21,15 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.util.AbstractSolrTestCase;
@ -43,6 +47,7 @@ public class MoreLikeThisHandlerTest extends AbstractSolrTestCase {
lrf = h.getRequestFactory("standard", 0, 20 );
}
public void testInterface()
{
SolrCore core = h.getCore();
@ -67,5 +72,35 @@ public class MoreLikeThisHandlerTest extends AbstractSolrTestCase {
mlt.handleRequestBody( req, new SolrQueryResponse() );
}
catch( Exception ex ) {} // expected
assertU(adoc(new String[]{"id","42","name","Tom Cruise","subword","Top Gun","subword","Risky Business","subword","The Color of Money","subword","Minority Report","subword", "Days of Thunder","subword", "Eyes Wide Shut","subword", "Far and Away"}));
assertU(adoc(new String[]{"id","43","name","Tom Hanks","subword","The Green Mile","subword","Forest Gump","subword","Philadelphia Story","subword","Big","subword","Cast Away"}));
assertU(adoc(new String[]{"id","44","name","Harrison Ford","subword","Star Wars","subword","Indiana Jones","subword","Patriot Games","subword","Regarding Henry"}));
assertU(adoc(new String[]{"id","45","name","George Harrison","subword","Yellow Submarine","subword","Help","subword","Magical Mystery Tour","subword","Sgt. Peppers Lonley Hearts Club Band"}));
assertU(adoc(new String[]{"id","46","name","Nicole Kidman","subword","Batman","subword","Days of Thunder","subword","Eyes Wide Shut","subword","Far and Away"}));
assertU(commit());
params.put(CommonParams.Q, new String[]{"id:42"});
params.put(MoreLikeThisParams.MLT, new String[]{"true"});
params.put(MoreLikeThisParams.SIMILARITY_FIELDS, new String[]{"name,subword"});
params.put(MoreLikeThisParams.INTERESTING_TERMS,new String[]{"details"});
params.put(MoreLikeThisParams.MIN_TERM_FREQ,new String[]{"1"});
params.put(MoreLikeThisParams.MIN_DOC_FREQ,new String[]{"1"});
SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, (SolrParams)mmparams);
assertQ("morelikethis - tom cruise",mltreq
,"//result/doc[1]/int[@name='id'][.='46']"
,"//result/doc[2]/int[@name='id'][.='43']");
params.put(CommonParams.Q, new String[]{"id:44"});
assertQ("morelike this - harrison ford",mltreq
,"//result/doc[1]/int[@name='id'][.='45']");
params.put(CommonParams.Q, new String[]{"id:42"});
params.put(MoreLikeThisParams.QF,new String[]{"name^5.0 subword^0.1"});
assertQ("morelikethis with weights",mltreq
,"//result/doc[1]/int[@name='id'][.='43']"
,"//result/doc[2]/int[@name='id'][.='46']");
}
}