SOLR-1085: Add support for MoreLikeThis queries and responses in SolrJ client

(cherry picked from commit 2172f3e)
This commit is contained in:
Shalin Shekhar Mangar 2016-10-27 17:41:25 +05:30
parent e047543ad5
commit 6400b9c3cb
7 changed files with 300 additions and 12 deletions

View File

@ -174,6 +174,9 @@ New Features
* SOLR-9559: Add ExecutorStream to execute stored Streaming Expressions (Joel Bernstein)
* SOLR-1085: Add support for MoreLikeThis queries and responses in SolrJ client.
(Maurice Jumelet, Bill Mitchell, Cao Manh Dat via shalin)
Bug Fixes
----------------------
@ -245,7 +248,7 @@ Bug Fixes
* SOLR-2094: XPathEntityProcessor should reinitialize the XPathRecordReader instance if
the 'forEach' or 'xpath' attributes are templates & it is not a root entity (Cao Manh Dat, noble)
Optimizations
----------------------

View File

@ -170,7 +170,7 @@ public class MoreLikeThisComponent extends SearchComponent {
&& rb.req.getParams().getBool(COMPONENT_NAME, false)) {
Map<Object,SolrDocumentList> tempResults = new LinkedHashMap<>();
int mltcount = rb.req.getParams().getInt(MoreLikeThisParams.DOC_COUNT, 5);
int mltcount = rb.req.getParams().getInt(MoreLikeThisParams.DOC_COUNT, MoreLikeThisParams.DEFAULT_DOC_COUNT);
String keyName = rb.req.getSchema().getUniqueKeyField().getName();
for (ShardRequest sreq : rb.finished) {

View File

@ -27,6 +27,7 @@ import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.common.params.StatsParams;
import org.apache.solr.common.params.TermsParams;
@ -801,6 +802,253 @@ public class SolrQuery extends ModifiableSolrParams
return this;
}
/**
* Add field for MoreLikeThis. Automatically
* enables MoreLikeThis.
*
* @param field the names of the field to be added
* @return this
*/
public SolrQuery addMoreLikeThisField(String field) {
this.setMoreLikeThis(true);
return addValueToParam(MoreLikeThisParams.SIMILARITY_FIELDS, field);
}
public SolrQuery setMoreLikeThisFields(String... fields) {
if( fields == null || fields.length == 0 ) {
this.remove( MoreLikeThisParams.SIMILARITY_FIELDS );
this.setMoreLikeThis(false);
return this;
}
StringBuilder sb = new StringBuilder();
sb.append(fields[0]);
for (int i = 1; i < fields.length; i++) {
sb.append(',');
sb.append(fields[i]);
}
this.set(MoreLikeThisParams.SIMILARITY_FIELDS, sb.toString());
this.setMoreLikeThis(true);
return this;
}
/**
* @return an array with the fields used to compute similarity.
*/
public String[] getMoreLikeThisFields() {
String fl = this.get(MoreLikeThisParams.SIMILARITY_FIELDS);
if(fl==null || fl.length()==0) {
return null;
}
return fl.split(",");
}
/**
* Sets the frequency below which terms will be ignored in the source doc
*
* @param mintf the minimum term frequency
* @return this
*/
public SolrQuery setMoreLikeThisMinTermFreq(int mintf) {
this.set(MoreLikeThisParams.MIN_TERM_FREQ, mintf);
return this;
}
/**
* Gets the frequency below which terms will be ignored in the source doc
*/
public int getMoreLikeThisMinTermFreq() {
return this.getInt(MoreLikeThisParams.MIN_TERM_FREQ, 2);
}
/**
* Sets the frequency at which words will be ignored which do not occur in
* at least this many docs.
*
* @param mindf the minimum document frequency
* @return this
*/
public SolrQuery setMoreLikeThisMinDocFreq(int mindf) {
this.set(MoreLikeThisParams.MIN_DOC_FREQ, mindf);
return this;
}
/**
* Gets the frequency at which words will be ignored which do not occur in
* at least this many docs.
*/
public int getMoreLikeThisMinDocFreq() {
return this.getInt(MoreLikeThisParams.MIN_DOC_FREQ, 5);
}
/**
* Sets the minimum word length below which words will be ignored.
*
* @param minwl the minimum word length
* @return this
*/
public SolrQuery setMoreLikeThisMinWordLen(int minwl) {
this.set(MoreLikeThisParams.MIN_WORD_LEN, minwl);
return this;
}
/**
* Gets the minimum word length below which words will be ignored.
*/
public int getMoreLikeThisMinWordLen() {
return this.getInt(MoreLikeThisParams.MIN_WORD_LEN, 0);
}
/**
* Sets the maximum word length above which words will be ignored.
*
* @param maxwl the maximum word length
* @return this
*/
public SolrQuery setMoreLikeThisMaxWordLen(int maxwl) {
this.set(MoreLikeThisParams.MAX_WORD_LEN, maxwl);
return this;
}
/**
* Gets the maximum word length above which words will be ignored.
*/
public int getMoreLikeThisMaxWordLen() {
return this.getInt(MoreLikeThisParams.MAX_WORD_LEN, 0);
}
/**
* Sets the maximum number of query terms that will be included in any
* generated query.
*
* @param maxqt the maximum number of query terms
* @return this
*/
public SolrQuery setMoreLikeThisMaxQueryTerms(int maxqt) {
this.set(MoreLikeThisParams.MAX_QUERY_TERMS, maxqt);
return this;
}
/**
* Gets the maximum number of query terms that will be included in any
* generated query.
*/
public int getMoreLikeThisMaxQueryTerms() {
return this.getInt(MoreLikeThisParams.MAX_QUERY_TERMS, 25);
}
/**
* Sets the maximum number of tokens to parse in each example doc field
* that is not stored with TermVector support.
*
* @param maxntp the maximum number of tokens to parse
* @return this
*/
public SolrQuery setMoreLikeThisMaxTokensParsed(int maxntp) {
this.set(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, maxntp);
return this;
}
/**
* Gets the maximum number of tokens to parse in each example doc field
* that is not stored with TermVector support.
*/
public int getMoreLikeThisMaxTokensParsed() {
return this.getInt(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, 5000);
}
/**
* Sets if the query will be boosted by the interesting term relevance.
*
* @param b set to true to boost the query with the interesting term relevance
* @return this
*/
public SolrQuery setMoreLikeThisBoost(boolean b) {
this.set(MoreLikeThisParams.BOOST, b);
return this;
}
/**
* Gets if the query will be boosted by the interesting term relevance.
*/
public boolean getMoreLikeThisBoost() {
return this.getBool(MoreLikeThisParams.BOOST, false);
}
/**
* Sets the query fields and their boosts using the same format as that
* used in DisMaxQParserPlugin. These fields must also be added
* using {@link #addMoreLikeThisField(String)}.
*
* @param qf the query fields
* @return this
*/
public SolrQuery setMoreLikeThisQF(String qf) {
this.set(MoreLikeThisParams.QF, qf);
return this;
}
/**
* Gets the query fields and their boosts.
*/
public String getMoreLikeThisQF() {
return this.get(MoreLikeThisParams.QF);
}
/**
* Sets the number of similar documents to return for each result.
*
* @param count the number of similar documents to return for each result
* @return this
*/
public SolrQuery setMoreLikeThisCount(int count) {
this.set(MoreLikeThisParams.DOC_COUNT, count);
return this;
}
/**
* Gets the number of similar documents to return for each result.
*/
public int getMoreLikeThisCount() {
return this.getInt(MoreLikeThisParams.DOC_COUNT, MoreLikeThisParams.DEFAULT_DOC_COUNT);
}
/**
* Enable/Disable MoreLikeThis. After enabling MoreLikeThis, the fields
* used for computing similarity must be specified calling
* {@link #addMoreLikeThisField(String)}.
*
* @param b flag to indicate if MoreLikeThis should be enabled. if b==false
* removes all mlt.* parameters
* @return this
*/
public SolrQuery setMoreLikeThis(boolean b) {
if(b) {
this.set(MoreLikeThisParams.MLT, true);
} else {
this.remove(MoreLikeThisParams.MLT);
this.remove(MoreLikeThisParams.SIMILARITY_FIELDS);
this.remove(MoreLikeThisParams.MIN_TERM_FREQ);
this.remove(MoreLikeThisParams.MIN_DOC_FREQ);
this.remove(MoreLikeThisParams.MIN_WORD_LEN);
this.remove(MoreLikeThisParams.MAX_WORD_LEN);
this.remove(MoreLikeThisParams.MAX_QUERY_TERMS);
this.remove(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED);
this.remove(MoreLikeThisParams.BOOST);
this.remove(MoreLikeThisParams.QF);
this.remove(MoreLikeThisParams.DOC_COUNT);
}
return this;
}
/**
* @return true if MoreLikeThis is enabled, false otherwise
*/
public boolean getMoreLikeThis() {
return this.getBool(MoreLikeThisParams.MLT, false);
}
public SolrQuery setFields(String ... fields) {
if( fields == null || fields.length == 0 ) {
this.remove( CommonParams.FL );

View File

@ -51,6 +51,7 @@ public class QueryResponse extends SolrResponseBase
private Map<String,NamedList<Object>> _suggestInfo = null;
private NamedList<Object> _statsInfo = null;
private NamedList<NamedList<Number>> _termsInfo = null;
private NamedList<SolrDocumentList> _moreLikeThisInfo = null;
private String _cursorMarkNext = null;
// Grouping response
@ -168,6 +169,9 @@ public class QueryResponse extends SolrResponseBase
_termsInfo = (NamedList<NamedList<Number>>) res.getVal( i );
extractTermsInfo( _termsInfo );
}
else if ( "moreLikeThis".equals( n ) ) {
_moreLikeThisInfo = (NamedList<SolrDocumentList>) res.getVal( i );
}
else if ( CursorMarkParams.CURSOR_MARK_NEXT.equals( n ) ) {
_cursorMarkNext = (String) res.getVal( i );
}
@ -547,6 +551,10 @@ public class QueryResponse extends SolrResponseBase
public TermsResponse getTermsResponse() {
return _termsResponse;
}
public NamedList<SolrDocumentList> getMoreLikeThis() {
return _moreLikeThisInfo;
}
/**
* See also: {@link #getLimitingFacets()}

View File

@ -50,6 +50,9 @@ public interface MoreLikeThisParams
// Do you want to include the original document in the results or not
public final static String INTERESTING_TERMS = PREFIX + "interestingTerms"; // false,details,(list or true)
// the default doc count
public final static int DEFAULT_DOC_COUNT = 5;
public enum TermStyle {
NONE,

View File

@ -1996,37 +1996,38 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase
// test with mlt.fl having comma separated values
SolrQuery q = new SolrQuery("*:*");
q.setRows(20);
q.setParam("mlt", "true");
q.setParam("mlt.mintf", "0");
q.setParam("mlt.count", "2");
q.setParam("mlt.fl", "x_s,y_s,z_s");
q.setMoreLikeThisFields("x_s", "y_s", "z_s");
q.setMoreLikeThisMinTermFreq(0);
q.setMoreLikeThisCount(2);
QueryResponse response = client.query(q);
assertEquals(20, response.getResults().getNumFound());
NamedList<Object> moreLikeThis = (NamedList<Object>) response.getResponse().get("moreLikeThis");
NamedList<SolrDocumentList> moreLikeThis = response.getMoreLikeThis();
assertNotNull("MoreLikeThis response should not have been null", moreLikeThis);
for (int i=0; i<20; i++) {
String id = "testMoreLikeThis" + i;
SolrDocumentList mltResp = (SolrDocumentList) moreLikeThis.get(id);
SolrDocumentList mltResp = moreLikeThis.get(id);
assertNotNull("MoreLikeThis response for id=" + id + " should not be null", mltResp);
assertTrue("MoreLikeThis response for id=" + id + " had numFound=0", mltResp.getNumFound() > 0);
assertTrue("MoreLikeThis response for id=" + id + " had not returned exactly 2 documents", mltResp.size() == 2);
}
// now test with multiple mlt.fl parameters
q = new SolrQuery("*:*");
q.setRows(20);
q.setParam("mlt", "true");
q.setParam("mlt.mintf", "0");
q.setParam("mlt.count", "2");
q.setParam("mlt.fl", "x_s", "y_s", "z_s");
q.setMoreLikeThisMinTermFreq(0);
q.setMoreLikeThisCount(2);
response = client.query(q);
assertEquals(20, response.getResults().getNumFound());
moreLikeThis = (NamedList<Object>) response.getResponse().get("moreLikeThis");
moreLikeThis = response.getMoreLikeThis();
assertNotNull("MoreLikeThis response should not have been null", moreLikeThis);
for (int i=0; i<20; i++) {
String id = "testMoreLikeThis" + i;
SolrDocumentList mltResp = (SolrDocumentList) moreLikeThis.get(id);
SolrDocumentList mltResp = moreLikeThis.get(id);
assertNotNull("MoreLikeThis response for id=" + id + " should not be null", mltResp);
assertTrue("MoreLikeThis response for id=" + id + " had numFound=0", mltResp.getNumFound() > 0);
assertTrue("MoreLikeThis response for id=" + id + " had not returned exactly 2 documents", mltResp.size() == 2);
}
}

View File

@ -431,4 +431,29 @@ public class SolrQueryTest extends LuceneTestCase {
assertNull(solrQuery.getParams("f.field3.facet.interval.set"));
}
public void testMoreLikeThis() {
SolrQuery solrQuery = new SolrQuery();
solrQuery.addMoreLikeThisField("mlt1");
assertTrue(solrQuery.getMoreLikeThis());
solrQuery.addMoreLikeThisField("mlt2");
solrQuery.addMoreLikeThisField("mlt3");
solrQuery.addMoreLikeThisField("mlt4");
assertEquals(4, solrQuery.getMoreLikeThisFields().length);
solrQuery.setMoreLikeThisFields(null);
assertTrue(null == solrQuery.getMoreLikeThisFields());
assertFalse(solrQuery.getMoreLikeThis());
assertEquals(true, solrQuery.setMoreLikeThisBoost(true).getMoreLikeThisBoost());
assertEquals("qf", solrQuery.setMoreLikeThisQF("qf").getMoreLikeThisQF());
assertEquals(10, solrQuery.setMoreLikeThisMaxTokensParsed(10).getMoreLikeThisMaxTokensParsed());
assertEquals(11, solrQuery.setMoreLikeThisMinTermFreq(11).getMoreLikeThisMinTermFreq());
assertEquals(12, solrQuery.setMoreLikeThisMinDocFreq(12).getMoreLikeThisMinDocFreq());
assertEquals(13, solrQuery.setMoreLikeThisMaxWordLen(13).getMoreLikeThisMaxWordLen());
assertEquals(14, solrQuery.setMoreLikeThisMinWordLen(14).getMoreLikeThisMinWordLen());
assertEquals(15, solrQuery.setMoreLikeThisMaxQueryTerms(15).getMoreLikeThisMaxQueryTerms());
assertEquals(16, solrQuery.setMoreLikeThisCount(16).getMoreLikeThisCount());
}
}