From a1b02cc58b0146cdaf379157a309656c9cb46185 Mon Sep 17 00:00:00 2001 From: Grant Ingersoll Date: Tue, 17 Jun 2008 12:48:57 +0000 Subject: [PATCH] SOLR-595: Added support for Field level boosting in MoreLikeThis git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@668638 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 ++ .../common/params/MoreLikeThisParams.java | 1 + .../solr/handler/MoreLikeThisHandler.java | 23 ++++++++++-- .../solr/handler/MoreLikeThisHandlerTest.java | 35 +++++++++++++++++++ 4 files changed, 59 insertions(+), 2 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index f11ff89c9f7..0a6d6fd484c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -288,6 +288,8 @@ New Features 51. SOLR-536: Add a DocumentObjectBinder to solrj that converts Objects to and from SolrDocuments. (Noble Paul via ryan) + +52. SOLR-595: Add support for Field level boosting in the MoreLikeThis Handler. (Tom Morton, gsingers) Changes in runtime behavior 1. SOLR-559: use Lucene updateDocument, deleteDocuments methods. This diff --git a/src/java/org/apache/solr/common/params/MoreLikeThisParams.java b/src/java/org/apache/solr/common/params/MoreLikeThisParams.java index 1cee9497501..9ca6ad3ecf8 100644 --- a/src/java/org/apache/solr/common/params/MoreLikeThisParams.java +++ b/src/java/org/apache/solr/common/params/MoreLikeThisParams.java @@ -35,6 +35,7 @@ public interface MoreLikeThisParams public final static String MAX_QUERY_TERMS = PREFIX + "maxqt"; public final static String MAX_NUM_TOKENS_PARSED = PREFIX + "maxntp"; public final static String BOOST = PREFIX + "boost"; // boost or not? + public final static String QF = PREFIX + "qf"; //boosting applied to mlt fields // the /mlt request handler uses 'rows' public final static String DOC_COUNT = PREFIX + "count"; diff --git a/src/java/org/apache/solr/handler/MoreLikeThisHandler.java b/src/java/org/apache/solr/handler/MoreLikeThisHandler.java index 10d1d5cdcf4..4af654e009d 100644 --- a/src/java/org/apache/solr/handler/MoreLikeThisHandler.java +++ b/src/java/org/apache/solr/handler/MoreLikeThisHandler.java @@ -23,8 +23,11 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Comparator; +import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.regex.Pattern; import org.apache.lucene.document.Document; @@ -37,6 +40,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.similar.MoreLikeThis; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.DisMaxParams; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.MoreLikeThisParams; import org.apache.solr.common.params.SolrParams; @@ -51,7 +55,6 @@ import org.apache.solr.request.SolrQueryResponse; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.DocIterator; -import org.apache.solr.search.DocSet; import org.apache.solr.search.DocList; import org.apache.solr.search.DocListAndSet; import org.apache.solr.search.QueryParsing; @@ -131,7 +134,6 @@ public class MoreLikeThisHandler extends RequestHandlerBase // Matching options boolean includeMatch = params.getBool( MoreLikeThisParams.MATCH_INCLUDE, true ); int matchOffset = params.getInt( MoreLikeThisParams.MATCH_OFFSET, 0 ); - // Find the base match Query query = QueryParsing.parseQuery(q, params.get(CommonParams.DF), params, req.getSchema()); DocList match = searcher.getDocList(query, null, null, matchOffset, 1, flags ); // only get the first one... @@ -231,6 +233,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase final IndexReader reader; final SchemaField uniqueKeyField; final boolean needDocSet; + Map boostFields; Query mltquery; // expose this for debugging @@ -260,12 +263,27 @@ public class MoreLikeThisHandler extends RequestHandlerBase mlt.setMaxQueryTerms( params.getInt(MoreLikeThisParams.MAX_QUERY_TERMS, MoreLikeThis.DEFAULT_MAX_QUERY_TERMS)); mlt.setMaxNumTokensParsed(params.getInt(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED)); mlt.setBoost( params.getBool(MoreLikeThisParams.BOOST, false ) ); + boostFields = SolrPluginUtils.parseFieldBoosts(params.getParams(MoreLikeThisParams.QF)); + } + + private void setBoosts(Query mltquery) { + if (boostFields.size() > 0) { + List clauses = ((BooleanQuery)mltquery).clauses(); + for( Object o : clauses ) { + TermQuery q = (TermQuery)((BooleanClause)o).getQuery(); + Float b = this.boostFields.get(q.getTerm().field()); + if (b != null) { + q.setBoost(b*q.getBoost()); + } + } + } } public DocListAndSet getMoreLikeThis( int id, int start, int rows, List filters, List terms, int flags ) throws IOException { Document doc = reader.document(id); mltquery = mlt.like(id); + setBoosts(mltquery); if( terms != null ) { fillInterestingTermsFromMLTQuery( mltquery, terms ); } @@ -289,6 +307,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List filters, List terms, int flags ) throws IOException { mltquery = mlt.like(reader); + setBoosts(mltquery); if( terms != null ) { fillInterestingTermsFromMLTQuery( mltquery, terms ); } diff --git a/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java b/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java index 01b76ddc0d8..adcf190a0dc 100644 --- a/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java +++ b/src/test/org/apache/solr/handler/MoreLikeThisHandlerTest.java @@ -21,11 +21,15 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.Map; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.MoreLikeThisParams; import org.apache.solr.common.params.MultiMapSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ContentStream; import org.apache.solr.common.util.ContentStreamBase; import org.apache.solr.core.SolrCore; +import org.apache.solr.request.LocalSolrQueryRequest; +import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequestBase; import org.apache.solr.request.SolrQueryResponse; import org.apache.solr.util.AbstractSolrTestCase; @@ -43,6 +47,7 @@ public class MoreLikeThisHandlerTest extends AbstractSolrTestCase { lrf = h.getRequestFactory("standard", 0, 20 ); } + public void testInterface() { SolrCore core = h.getCore(); @@ -67,5 +72,35 @@ public class MoreLikeThisHandlerTest extends AbstractSolrTestCase { mlt.handleRequestBody( req, new SolrQueryResponse() ); } catch( Exception ex ) {} // expected + + assertU(adoc(new String[]{"id","42","name","Tom Cruise","subword","Top Gun","subword","Risky Business","subword","The Color of Money","subword","Minority Report","subword", "Days of Thunder","subword", "Eyes Wide Shut","subword", "Far and Away"})); + assertU(adoc(new String[]{"id","43","name","Tom Hanks","subword","The Green Mile","subword","Forest Gump","subword","Philadelphia Story","subword","Big","subword","Cast Away"})); + assertU(adoc(new String[]{"id","44","name","Harrison Ford","subword","Star Wars","subword","Indiana Jones","subword","Patriot Games","subword","Regarding Henry"})); + assertU(adoc(new String[]{"id","45","name","George Harrison","subword","Yellow Submarine","subword","Help","subword","Magical Mystery Tour","subword","Sgt. Peppers Lonley Hearts Club Band"})); + assertU(adoc(new String[]{"id","46","name","Nicole Kidman","subword","Batman","subword","Days of Thunder","subword","Eyes Wide Shut","subword","Far and Away"})); + assertU(commit()); + + params.put(CommonParams.Q, new String[]{"id:42"}); + params.put(MoreLikeThisParams.MLT, new String[]{"true"}); + params.put(MoreLikeThisParams.SIMILARITY_FIELDS, new String[]{"name,subword"}); + params.put(MoreLikeThisParams.INTERESTING_TERMS,new String[]{"details"}); + params.put(MoreLikeThisParams.MIN_TERM_FREQ,new String[]{"1"}); + params.put(MoreLikeThisParams.MIN_DOC_FREQ,new String[]{"1"}); + + SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, (SolrParams)mmparams); + assertQ("morelikethis - tom cruise",mltreq + ,"//result/doc[1]/int[@name='id'][.='46']" + ,"//result/doc[2]/int[@name='id'][.='43']"); + + params.put(CommonParams.Q, new String[]{"id:44"}); + assertQ("morelike this - harrison ford",mltreq + ,"//result/doc[1]/int[@name='id'][.='45']"); + + params.put(CommonParams.Q, new String[]{"id:42"}); + params.put(MoreLikeThisParams.QF,new String[]{"name^5.0 subword^0.1"}); + assertQ("morelikethis with weights",mltreq + ,"//result/doc[1]/int[@name='id'][.='43']" + ,"//result/doc[2]/int[@name='id'][.='46']"); + } }