mirror of https://github.com/apache/lucene.git
SOLR-595: Added support for Field level boosting in MoreLikeThis
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@668638 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
96d68357d6
commit
a1b02cc58b
|
@ -289,6 +289,8 @@ New Features
|
|||
51. SOLR-536: Add a DocumentObjectBinder to solrj that converts Objects to and
|
||||
from SolrDocuments. (Noble Paul via ryan)
|
||||
|
||||
52. SOLR-595: Add support for Field level boosting in the MoreLikeThis Handler. (Tom Morton, gsingers)
|
||||
|
||||
Changes in runtime behavior
|
||||
1. SOLR-559: use Lucene updateDocument, deleteDocuments methods. This
|
||||
removes the maxBufferedDeletes parameter added by SOLR-310 as Lucene
|
||||
|
|
|
@ -35,6 +35,7 @@ public interface MoreLikeThisParams
|
|||
public final static String MAX_QUERY_TERMS = PREFIX + "maxqt";
|
||||
public final static String MAX_NUM_TOKENS_PARSED = PREFIX + "maxntp";
|
||||
public final static String BOOST = PREFIX + "boost"; // boost or not?
|
||||
public final static String QF = PREFIX + "qf"; //boosting applied to mlt fields
|
||||
|
||||
// the /mlt request handler uses 'rows'
|
||||
public final static String DOC_COUNT = PREFIX + "count";
|
||||
|
|
|
@ -23,8 +23,11 @@ import java.net.MalformedURLException;
|
|||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -37,6 +40,7 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.search.similar.MoreLikeThis;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.DisMaxParams;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.params.MoreLikeThisParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
@ -51,7 +55,6 @@ import org.apache.solr.request.SolrQueryResponse;
|
|||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.DocListAndSet;
|
||||
import org.apache.solr.search.QueryParsing;
|
||||
|
@ -131,7 +134,6 @@ public class MoreLikeThisHandler extends RequestHandlerBase
|
|||
// Matching options
|
||||
boolean includeMatch = params.getBool( MoreLikeThisParams.MATCH_INCLUDE, true );
|
||||
int matchOffset = params.getInt( MoreLikeThisParams.MATCH_OFFSET, 0 );
|
||||
|
||||
// Find the base match
|
||||
Query query = QueryParsing.parseQuery(q, params.get(CommonParams.DF), params, req.getSchema());
|
||||
DocList match = searcher.getDocList(query, null, null, matchOffset, 1, flags ); // only get the first one...
|
||||
|
@ -231,6 +233,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
|
|||
final IndexReader reader;
|
||||
final SchemaField uniqueKeyField;
|
||||
final boolean needDocSet;
|
||||
Map<String,Float> boostFields;
|
||||
|
||||
Query mltquery; // expose this for debugging
|
||||
|
||||
|
@ -260,12 +263,27 @@ public class MoreLikeThisHandler extends RequestHandlerBase
|
|||
mlt.setMaxQueryTerms( params.getInt(MoreLikeThisParams.MAX_QUERY_TERMS, MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
|
||||
mlt.setMaxNumTokensParsed(params.getInt(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
|
||||
mlt.setBoost( params.getBool(MoreLikeThisParams.BOOST, false ) );
|
||||
boostFields = SolrPluginUtils.parseFieldBoosts(params.getParams(MoreLikeThisParams.QF));
|
||||
}
|
||||
|
||||
private void setBoosts(Query mltquery) {
|
||||
if (boostFields.size() > 0) {
|
||||
List clauses = ((BooleanQuery)mltquery).clauses();
|
||||
for( Object o : clauses ) {
|
||||
TermQuery q = (TermQuery)((BooleanClause)o).getQuery();
|
||||
Float b = this.boostFields.get(q.getTerm().field());
|
||||
if (b != null) {
|
||||
q.setBoost(b*q.getBoost());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public DocListAndSet getMoreLikeThis( int id, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
|
||||
{
|
||||
Document doc = reader.document(id);
|
||||
mltquery = mlt.like(id);
|
||||
setBoosts(mltquery);
|
||||
if( terms != null ) {
|
||||
fillInterestingTermsFromMLTQuery( mltquery, terms );
|
||||
}
|
||||
|
@ -289,6 +307,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
|
|||
public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
|
||||
{
|
||||
mltquery = mlt.like(reader);
|
||||
setBoosts(mltquery);
|
||||
if( terms != null ) {
|
||||
fillInterestingTermsFromMLTQuery( mltquery, terms );
|
||||
}
|
||||
|
|
|
@ -21,11 +21,15 @@ import java.util.ArrayList;
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.MoreLikeThisParams;
|
||||
import org.apache.solr.common.params.MultiMapSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequestBase;
|
||||
import org.apache.solr.request.SolrQueryResponse;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
|
@ -43,6 +47,7 @@ public class MoreLikeThisHandlerTest extends AbstractSolrTestCase {
|
|||
lrf = h.getRequestFactory("standard", 0, 20 );
|
||||
}
|
||||
|
||||
|
||||
public void testInterface()
|
||||
{
|
||||
SolrCore core = h.getCore();
|
||||
|
@ -67,5 +72,35 @@ public class MoreLikeThisHandlerTest extends AbstractSolrTestCase {
|
|||
mlt.handleRequestBody( req, new SolrQueryResponse() );
|
||||
}
|
||||
catch( Exception ex ) {} // expected
|
||||
|
||||
assertU(adoc(new String[]{"id","42","name","Tom Cruise","subword","Top Gun","subword","Risky Business","subword","The Color of Money","subword","Minority Report","subword", "Days of Thunder","subword", "Eyes Wide Shut","subword", "Far and Away"}));
|
||||
assertU(adoc(new String[]{"id","43","name","Tom Hanks","subword","The Green Mile","subword","Forest Gump","subword","Philadelphia Story","subword","Big","subword","Cast Away"}));
|
||||
assertU(adoc(new String[]{"id","44","name","Harrison Ford","subword","Star Wars","subword","Indiana Jones","subword","Patriot Games","subword","Regarding Henry"}));
|
||||
assertU(adoc(new String[]{"id","45","name","George Harrison","subword","Yellow Submarine","subword","Help","subword","Magical Mystery Tour","subword","Sgt. Peppers Lonley Hearts Club Band"}));
|
||||
assertU(adoc(new String[]{"id","46","name","Nicole Kidman","subword","Batman","subword","Days of Thunder","subword","Eyes Wide Shut","subword","Far and Away"}));
|
||||
assertU(commit());
|
||||
|
||||
params.put(CommonParams.Q, new String[]{"id:42"});
|
||||
params.put(MoreLikeThisParams.MLT, new String[]{"true"});
|
||||
params.put(MoreLikeThisParams.SIMILARITY_FIELDS, new String[]{"name,subword"});
|
||||
params.put(MoreLikeThisParams.INTERESTING_TERMS,new String[]{"details"});
|
||||
params.put(MoreLikeThisParams.MIN_TERM_FREQ,new String[]{"1"});
|
||||
params.put(MoreLikeThisParams.MIN_DOC_FREQ,new String[]{"1"});
|
||||
|
||||
SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, (SolrParams)mmparams);
|
||||
assertQ("morelikethis - tom cruise",mltreq
|
||||
,"//result/doc[1]/int[@name='id'][.='46']"
|
||||
,"//result/doc[2]/int[@name='id'][.='43']");
|
||||
|
||||
params.put(CommonParams.Q, new String[]{"id:44"});
|
||||
assertQ("morelike this - harrison ford",mltreq
|
||||
,"//result/doc[1]/int[@name='id'][.='45']");
|
||||
|
||||
params.put(CommonParams.Q, new String[]{"id:42"});
|
||||
params.put(MoreLikeThisParams.QF,new String[]{"name^5.0 subword^0.1"});
|
||||
assertQ("morelikethis with weights",mltreq
|
||||
,"//result/doc[1]/int[@name='id'][.='43']"
|
||||
,"//result/doc[2]/int[@name='id'][.='46']");
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue