mirror of https://github.com/apache/lucene.git
SOLR-595: Added support for Field level boosting in MoreLikeThis
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@668638 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
96d68357d6
commit
a1b02cc58b
|
@ -288,6 +288,8 @@ New Features
|
||||||
|
|
||||||
51. SOLR-536: Add a DocumentObjectBinder to solrj that converts Objects to and
|
51. SOLR-536: Add a DocumentObjectBinder to solrj that converts Objects to and
|
||||||
from SolrDocuments. (Noble Paul via ryan)
|
from SolrDocuments. (Noble Paul via ryan)
|
||||||
|
|
||||||
|
52. SOLR-595: Add support for Field level boosting in the MoreLikeThis Handler. (Tom Morton, gsingers)
|
||||||
|
|
||||||
Changes in runtime behavior
|
Changes in runtime behavior
|
||||||
1. SOLR-559: use Lucene updateDocument, deleteDocuments methods. This
|
1. SOLR-559: use Lucene updateDocument, deleteDocuments methods. This
|
||||||
|
|
|
@ -35,6 +35,7 @@ public interface MoreLikeThisParams
|
||||||
public final static String MAX_QUERY_TERMS = PREFIX + "maxqt";
|
public final static String MAX_QUERY_TERMS = PREFIX + "maxqt";
|
||||||
public final static String MAX_NUM_TOKENS_PARSED = PREFIX + "maxntp";
|
public final static String MAX_NUM_TOKENS_PARSED = PREFIX + "maxntp";
|
||||||
public final static String BOOST = PREFIX + "boost"; // boost or not?
|
public final static String BOOST = PREFIX + "boost"; // boost or not?
|
||||||
|
public final static String QF = PREFIX + "qf"; //boosting applied to mlt fields
|
||||||
|
|
||||||
// the /mlt request handler uses 'rows'
|
// the /mlt request handler uses 'rows'
|
||||||
public final static String DOC_COUNT = PREFIX + "count";
|
public final static String DOC_COUNT = PREFIX + "count";
|
||||||
|
|
|
@ -23,8 +23,11 @@ import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -37,6 +40,7 @@ import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.similar.MoreLikeThis;
|
import org.apache.lucene.search.similar.MoreLikeThis;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
import org.apache.solr.common.params.CommonParams;
|
||||||
|
import org.apache.solr.common.params.DisMaxParams;
|
||||||
import org.apache.solr.common.params.FacetParams;
|
import org.apache.solr.common.params.FacetParams;
|
||||||
import org.apache.solr.common.params.MoreLikeThisParams;
|
import org.apache.solr.common.params.MoreLikeThisParams;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
@ -51,7 +55,6 @@ import org.apache.solr.request.SolrQueryResponse;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.search.DocIterator;
|
import org.apache.solr.search.DocIterator;
|
||||||
import org.apache.solr.search.DocSet;
|
|
||||||
import org.apache.solr.search.DocList;
|
import org.apache.solr.search.DocList;
|
||||||
import org.apache.solr.search.DocListAndSet;
|
import org.apache.solr.search.DocListAndSet;
|
||||||
import org.apache.solr.search.QueryParsing;
|
import org.apache.solr.search.QueryParsing;
|
||||||
|
@ -131,7 +134,6 @@ public class MoreLikeThisHandler extends RequestHandlerBase
|
||||||
// Matching options
|
// Matching options
|
||||||
boolean includeMatch = params.getBool( MoreLikeThisParams.MATCH_INCLUDE, true );
|
boolean includeMatch = params.getBool( MoreLikeThisParams.MATCH_INCLUDE, true );
|
||||||
int matchOffset = params.getInt( MoreLikeThisParams.MATCH_OFFSET, 0 );
|
int matchOffset = params.getInt( MoreLikeThisParams.MATCH_OFFSET, 0 );
|
||||||
|
|
||||||
// Find the base match
|
// Find the base match
|
||||||
Query query = QueryParsing.parseQuery(q, params.get(CommonParams.DF), params, req.getSchema());
|
Query query = QueryParsing.parseQuery(q, params.get(CommonParams.DF), params, req.getSchema());
|
||||||
DocList match = searcher.getDocList(query, null, null, matchOffset, 1, flags ); // only get the first one...
|
DocList match = searcher.getDocList(query, null, null, matchOffset, 1, flags ); // only get the first one...
|
||||||
|
@ -231,6 +233,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
|
||||||
final IndexReader reader;
|
final IndexReader reader;
|
||||||
final SchemaField uniqueKeyField;
|
final SchemaField uniqueKeyField;
|
||||||
final boolean needDocSet;
|
final boolean needDocSet;
|
||||||
|
Map<String,Float> boostFields;
|
||||||
|
|
||||||
Query mltquery; // expose this for debugging
|
Query mltquery; // expose this for debugging
|
||||||
|
|
||||||
|
@ -260,12 +263,27 @@ public class MoreLikeThisHandler extends RequestHandlerBase
|
||||||
mlt.setMaxQueryTerms( params.getInt(MoreLikeThisParams.MAX_QUERY_TERMS, MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
|
mlt.setMaxQueryTerms( params.getInt(MoreLikeThisParams.MAX_QUERY_TERMS, MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
|
||||||
mlt.setMaxNumTokensParsed(params.getInt(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
|
mlt.setMaxNumTokensParsed(params.getInt(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
|
||||||
mlt.setBoost( params.getBool(MoreLikeThisParams.BOOST, false ) );
|
mlt.setBoost( params.getBool(MoreLikeThisParams.BOOST, false ) );
|
||||||
|
boostFields = SolrPluginUtils.parseFieldBoosts(params.getParams(MoreLikeThisParams.QF));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setBoosts(Query mltquery) {
|
||||||
|
if (boostFields.size() > 0) {
|
||||||
|
List clauses = ((BooleanQuery)mltquery).clauses();
|
||||||
|
for( Object o : clauses ) {
|
||||||
|
TermQuery q = (TermQuery)((BooleanClause)o).getQuery();
|
||||||
|
Float b = this.boostFields.get(q.getTerm().field());
|
||||||
|
if (b != null) {
|
||||||
|
q.setBoost(b*q.getBoost());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public DocListAndSet getMoreLikeThis( int id, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
|
public DocListAndSet getMoreLikeThis( int id, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
|
||||||
{
|
{
|
||||||
Document doc = reader.document(id);
|
Document doc = reader.document(id);
|
||||||
mltquery = mlt.like(id);
|
mltquery = mlt.like(id);
|
||||||
|
setBoosts(mltquery);
|
||||||
if( terms != null ) {
|
if( terms != null ) {
|
||||||
fillInterestingTermsFromMLTQuery( mltquery, terms );
|
fillInterestingTermsFromMLTQuery( mltquery, terms );
|
||||||
}
|
}
|
||||||
|
@ -289,6 +307,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
|
||||||
public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
|
public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
|
||||||
{
|
{
|
||||||
mltquery = mlt.like(reader);
|
mltquery = mlt.like(reader);
|
||||||
|
setBoosts(mltquery);
|
||||||
if( terms != null ) {
|
if( terms != null ) {
|
||||||
fillInterestingTermsFromMLTQuery( mltquery, terms );
|
fillInterestingTermsFromMLTQuery( mltquery, terms );
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,11 +21,15 @@ import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.solr.common.params.CommonParams;
|
||||||
|
import org.apache.solr.common.params.MoreLikeThisParams;
|
||||||
import org.apache.solr.common.params.MultiMapSolrParams;
|
import org.apache.solr.common.params.MultiMapSolrParams;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
import org.apache.solr.common.util.ContentStream;
|
import org.apache.solr.common.util.ContentStream;
|
||||||
import org.apache.solr.common.util.ContentStreamBase;
|
import org.apache.solr.common.util.ContentStreamBase;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.request.SolrQueryRequestBase;
|
import org.apache.solr.request.SolrQueryRequestBase;
|
||||||
import org.apache.solr.request.SolrQueryResponse;
|
import org.apache.solr.request.SolrQueryResponse;
|
||||||
import org.apache.solr.util.AbstractSolrTestCase;
|
import org.apache.solr.util.AbstractSolrTestCase;
|
||||||
|
@ -43,6 +47,7 @@ public class MoreLikeThisHandlerTest extends AbstractSolrTestCase {
|
||||||
lrf = h.getRequestFactory("standard", 0, 20 );
|
lrf = h.getRequestFactory("standard", 0, 20 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testInterface()
|
public void testInterface()
|
||||||
{
|
{
|
||||||
SolrCore core = h.getCore();
|
SolrCore core = h.getCore();
|
||||||
|
@ -67,5 +72,35 @@ public class MoreLikeThisHandlerTest extends AbstractSolrTestCase {
|
||||||
mlt.handleRequestBody( req, new SolrQueryResponse() );
|
mlt.handleRequestBody( req, new SolrQueryResponse() );
|
||||||
}
|
}
|
||||||
catch( Exception ex ) {} // expected
|
catch( Exception ex ) {} // expected
|
||||||
|
|
||||||
|
assertU(adoc(new String[]{"id","42","name","Tom Cruise","subword","Top Gun","subword","Risky Business","subword","The Color of Money","subword","Minority Report","subword", "Days of Thunder","subword", "Eyes Wide Shut","subword", "Far and Away"}));
|
||||||
|
assertU(adoc(new String[]{"id","43","name","Tom Hanks","subword","The Green Mile","subword","Forest Gump","subword","Philadelphia Story","subword","Big","subword","Cast Away"}));
|
||||||
|
assertU(adoc(new String[]{"id","44","name","Harrison Ford","subword","Star Wars","subword","Indiana Jones","subword","Patriot Games","subword","Regarding Henry"}));
|
||||||
|
assertU(adoc(new String[]{"id","45","name","George Harrison","subword","Yellow Submarine","subword","Help","subword","Magical Mystery Tour","subword","Sgt. Peppers Lonley Hearts Club Band"}));
|
||||||
|
assertU(adoc(new String[]{"id","46","name","Nicole Kidman","subword","Batman","subword","Days of Thunder","subword","Eyes Wide Shut","subword","Far and Away"}));
|
||||||
|
assertU(commit());
|
||||||
|
|
||||||
|
params.put(CommonParams.Q, new String[]{"id:42"});
|
||||||
|
params.put(MoreLikeThisParams.MLT, new String[]{"true"});
|
||||||
|
params.put(MoreLikeThisParams.SIMILARITY_FIELDS, new String[]{"name,subword"});
|
||||||
|
params.put(MoreLikeThisParams.INTERESTING_TERMS,new String[]{"details"});
|
||||||
|
params.put(MoreLikeThisParams.MIN_TERM_FREQ,new String[]{"1"});
|
||||||
|
params.put(MoreLikeThisParams.MIN_DOC_FREQ,new String[]{"1"});
|
||||||
|
|
||||||
|
SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, (SolrParams)mmparams);
|
||||||
|
assertQ("morelikethis - tom cruise",mltreq
|
||||||
|
,"//result/doc[1]/int[@name='id'][.='46']"
|
||||||
|
,"//result/doc[2]/int[@name='id'][.='43']");
|
||||||
|
|
||||||
|
params.put(CommonParams.Q, new String[]{"id:44"});
|
||||||
|
assertQ("morelike this - harrison ford",mltreq
|
||||||
|
,"//result/doc[1]/int[@name='id'][.='45']");
|
||||||
|
|
||||||
|
params.put(CommonParams.Q, new String[]{"id:42"});
|
||||||
|
params.put(MoreLikeThisParams.QF,new String[]{"name^5.0 subword^0.1"});
|
||||||
|
assertQ("morelikethis with weights",mltreq
|
||||||
|
,"//result/doc[1]/int[@name='id'][.='43']"
|
||||||
|
,"//result/doc[2]/int[@name='id'][.='46']");
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue