SOLR-12304: MLT component now supports mlt.interestingTerms

This commit is contained in:
Alessandro Benedetti 2019-05-16 23:11:43 -04:00 committed by David Smiley
parent 9d7c1923e4
commit b9db118ed3
5 changed files with 335 additions and 15 deletions

View File

@ -90,6 +90,9 @@ New Features
* SOLR-13468: autoscaling/suggestions should be able to give suggestions from config sent as a payload (noble)
* SOLR-12304: The MoreLikeThisComponent now supports the mlt.interestingTerms parameter. Previously this option was
unique to the MLT handler. (Alessandro Benedetti via David Smiley)
Other Changes
----------------------

View File

@ -398,7 +398,7 @@ public class MoreLikeThisHandler extends RequestHandlerBase
rawMLTQuery = mlt.like(id);
boostedMLTQuery = getBoostedQuery( rawMLTQuery );
if( terms != null ) {
fillInterestingTermsFromMLTQuery( rawMLTQuery, terms );
fillInterestingTermsFromMLTQuery( boostedMLTQuery, terms );
}
// exclude current document from results

View File

@ -191,11 +191,6 @@ public class MoreLikeThisComponent extends SearchComponent {
log.info("MLT: results added for key: " + key + " documents: "
+ shardDocList.toString());
// if (log.isDebugEnabled()) {
// for (SolrDocument doc : shardDocList) {
// doc.addField("shard", "=" + r.getShard());
// }
// }
SolrDocumentList mergedDocList = tempResults.get(key);
if (mergedDocList == null) {
@ -370,21 +365,31 @@ public class MoreLikeThisComponent extends SearchComponent {
IndexSchema schema = searcher.getSchema();
MoreLikeThisHandler.MoreLikeThisHelper mltHelper = new MoreLikeThisHandler.MoreLikeThisHelper(
p, searcher);
NamedList<DocList> mlt = new SimpleOrderedMap<>();
NamedList<DocList> mltResponse = new SimpleOrderedMap<>();
DocIterator iterator = docs.iterator();
SimpleOrderedMap<Object> dbg = null;
if (rb.isDebug()) {
dbg = new SimpleOrderedMap<>();
}
SimpleOrderedMap<Object> interestingTermsResponse = null;
MoreLikeThisParams.TermStyle interestingTermsConfig = MoreLikeThisParams.TermStyle.get(p.get(MoreLikeThisParams.INTERESTING_TERMS));
List<MoreLikeThisHandler.InterestingTerm> interestingTerms = (interestingTermsConfig == MoreLikeThisParams.TermStyle.NONE)
? null : new ArrayList<>(mltHelper.getMoreLikeThis().getMaxQueryTerms());
if (interestingTerms != null) {
interestingTermsResponse = new SimpleOrderedMap<>();
}
while (iterator.hasNext()) {
int id = iterator.nextDoc();
int rows = p.getInt(MoreLikeThisParams.DOC_COUNT, 5);
DocListAndSet sim = mltHelper.getMoreLikeThis(id, 0, rows, null, null,
DocListAndSet similarDocuments = mltHelper.getMoreLikeThis(id, 0, rows, null, interestingTerms,
flags);
String name = schema.printableUniqueKey(searcher.doc(id));
mlt.add(name, sim.docList);
mltResponse.add(name, similarDocuments.docList);
if (dbg != null) {
SimpleOrderedMap<Object> docDbg = new SimpleOrderedMap<>();
@ -393,9 +398,9 @@ public class MoreLikeThisComponent extends SearchComponent {
.add("boostedMLTQuery", mltHelper.getBoostedMLTQuery().toString());
docDbg.add("realMLTQuery", mltHelper.getRealMLTQuery().toString());
SimpleOrderedMap<Object> explains = new SimpleOrderedMap<>();
DocIterator mltIte = sim.docList.iterator();
while (mltIte.hasNext()) {
int mltid = mltIte.nextDoc();
DocIterator similarDocumentsIterator = similarDocuments.docList.iterator();
while (similarDocumentsIterator.hasNext()) {
int mltid = similarDocumentsIterator.nextDoc();
String key = schema.printableUniqueKey(searcher.doc(mltid));
explains.add(key,
searcher.explain(mltHelper.getRealMLTQuery(), mltid));
@ -403,13 +408,32 @@ public class MoreLikeThisComponent extends SearchComponent {
docDbg.add("explain", explains);
dbg.add(name, docDbg);
}
if (interestingTermsResponse != null) {
if (interestingTermsConfig == MoreLikeThisParams.TermStyle.DETAILS) {
SimpleOrderedMap<Float> interestingTermsWithScore = new SimpleOrderedMap<>();
for (MoreLikeThisHandler.InterestingTerm interestingTerm : interestingTerms) {
interestingTermsWithScore.add(interestingTerm.term.toString(), interestingTerm.boost);
}
interestingTermsResponse.add(name, interestingTermsWithScore);
} else {
List<String> interestingTermsString = new ArrayList<>(interestingTerms.size());
for (MoreLikeThisHandler.InterestingTerm interestingTerm : interestingTerms) {
interestingTermsString.add(interestingTerm.term.toString());
}
interestingTermsResponse.add(name, interestingTermsString);
}
}
}
// add debug information
if (dbg != null) {
rb.addDebugInfo("moreLikeThis", dbg);
}
return mlt;
// add Interesting Terms
if (interestingTermsResponse != null) {
rb.rsp.add("interestingTerms", interestingTermsResponse);
}
return mltResponse;
}
// ///////////////////////////////////////////

View File

@ -0,0 +1,286 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass;
import org.junit.Test;
/**
* Test for MoreLikeThisComponent
*
*
* @see MoreLikeThisComponent
*/
@Slow
public class MoreLikeThisComponentTest extends SolrTestCaseJ4 {
@BeforeClass
public static void moreLikeThisBeforeClass() throws Exception {
initCore("solrconfig.xml", "schema.xml");
assertU(adoc("id","42","name","Tom Cruise","subword","Top Gun","subword","Risky Business","subword","The Color of Money","subword","Minority Report","subword", "Days of Thunder","subword", "Eyes Wide Shut","subword", "Far and Away", "foo_ti","10"));
assertU(adoc("id","43","name","Tom Hanks","subword","The Green Mile","subword","Forest Gump","subword","Philadelphia Story","subword","Big","subword","Cast Away", "foo_ti","10"));
assertU(adoc("id","44","name","Harrison Ford","subword","Star Wars","subword","Indiana Jones","subword","Patriot Games","subword","Regarding Henry"));
assertU(adoc("id","45","name","George Harrison","subword","Yellow Submarine","subword","Help","subword","Magical Mystery Tour","subword","Sgt. Peppers Lonley Hearts Club Band"));
assertU(adoc("id","46","name","Nicole Kidman","subword","Batman","subword","Days of Thunder","subword","Eyes Wide Shut","subword","Far and Away"));
assertU(commit());
}
private void initCommonMoreLikeThisParams(ModifiableSolrParams params) {
params.set(MoreLikeThisParams.MLT, "true");
params.set(MoreLikeThisParams.SIMILARITY_FIELDS, "name,subword");
params.set(MoreLikeThisParams.MIN_TERM_FREQ,"1");
params.set(MoreLikeThisParams.MIN_DOC_FREQ,"1");
params.set("indent","true");
}
@Test
public void testMLT_baseParams_shouldReturnSimilarDocuments()
{
SolrCore core = h.getCore();
ModifiableSolrParams params = new ModifiableSolrParams();
initCommonMoreLikeThisParams(params);
params.set(CommonParams.Q, "id:42");
SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params);
assertQ("morelikethis - tom cruise",mltreq
,"//result/doc[1]/str[@name='id'][.='46']"
,"//result/doc[2]/str[@name='id'][.='43']");
params.set(CommonParams.Q, "id:44");
mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params);
assertQ("morelike this - harrison ford",mltreq
,"//result/doc[1]/str[@name='id'][.='45']");
mltreq.close();
}
@Test
public void testMLT_baseParamsInterestingTermsDetails_shouldReturnSimilarDocumentsAndInterestingTermsDetails()
{
SolrCore core = h.getCore();
ModifiableSolrParams params = new ModifiableSolrParams();
initCommonMoreLikeThisParams(params);
params.set(MoreLikeThisParams.INTERESTING_TERMS, "details");
params.set(CommonParams.Q, "id:42");
SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params);
assertQ("morelikethis - tom cruise",mltreq
,"//result/doc[1]/str[@name='id'][.='46']"
,"//result/doc[2]/str[@name='id'][.='43']",
"//lst[@name='interestingTerms']/lst[1][count(*)>0]",
"//lst[@name='interestingTerms']/lst[1]/float[.=1.0]");
mltreq.close();
}
@Test
public void testMLT_baseParamsInterestingTermsList_shouldReturnSimilarDocumentsAndInterestingTermsList()
{
SolrCore core = h.getCore();
ModifiableSolrParams params = new ModifiableSolrParams();
initCommonMoreLikeThisParams(params);
params.set(MoreLikeThisParams.INTERESTING_TERMS, "list");
params.set(CommonParams.Q, "id:42");
SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params);
assertQ("morelikethis - tom cruise",mltreq
,"//result/doc[1]/str[@name='id'][.='46']"
,"//result/doc[2]/str[@name='id'][.='43']",
"//lst[@name='interestingTerms']/arr[@name='42'][count(*)>0]",
"//lst[@name='interestingTerms']/arr[@name='42']/str[.='name:Cruise']");
mltreq.close();
}
@Test
public void testMLT_boostEnabled_shouldReturnSimilarDocumentsConsideringBoost()
{
SolrCore core = h.getCore();
ModifiableSolrParams params = new ModifiableSolrParams();
initCommonMoreLikeThisParams(params);
params.set(MoreLikeThisParams.BOOST, "true");
params.set(CommonParams.Q, "id:42");
SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params);
assertQ("morelikethis - tom cruise",mltreq
,"//result/doc[1]/str[@name='id'][.='46']"
,"//result/doc[2]/str[@name='id'][.='43']");
params.set(CommonParams.Q, "id:42");
params.set(MoreLikeThisParams.QF,"name^5.0 subword^0.1");
mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params);
assertQ("morelikethis with weights",mltreq
,"//result/doc[1]/str[@name='id'][.='43']"
,"//result/doc[2]/str[@name='id'][.='46']");
mltreq.close();
}
@Test
public void testMLT_boostEnabledInterestingTermsDetails_shouldReturnSimilarDocumentsConsideringBoostAndInterestingTermsDetails()
{
SolrCore core = h.getCore();
ModifiableSolrParams params = new ModifiableSolrParams();
initCommonMoreLikeThisParams(params);
params.set(MoreLikeThisParams.BOOST, "true");
params.set(MoreLikeThisParams.INTERESTING_TERMS, "details");
params.set(CommonParams.Q, "id:42");
SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params);
assertQ("morelikethis - tom cruise",mltreq
,"//result/doc[1]/str[@name='id'][.='46']"
,"//result/doc[2]/str[@name='id'][.='43']",
"//lst[@name='interestingTerms']/lst[1][count(*)>0]",
"//lst[@name='interestingTerms']/lst[1]/float[.>1.0]");
params.set(MoreLikeThisParams.QF,"name^5.0 subword^0.1");
mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params);
assertQ("morelikethis with weights",mltreq
,"//result/doc[1]/str[@name='id'][.='43']"
,"//result/doc[2]/str[@name='id'][.='46']",
"//lst[@name='interestingTerms']/lst[1][count(*)>0]",
"//lst[@name='interestingTerms']/lst[1]/float[.>5.0]");
mltreq.close();
}
@Test
public void testMLT_boostEnabledInterestingTermsList_shouldReturnSimilarDocumentsConsideringBoostAndInterestingTermsList()
{
SolrCore core = h.getCore();
ModifiableSolrParams params = new ModifiableSolrParams();
initCommonMoreLikeThisParams(params);
params.set(MoreLikeThisParams.BOOST, "true");
params.set(MoreLikeThisParams.INTERESTING_TERMS, "list");
params.set(CommonParams.Q, "id:42");
SolrQueryRequest mltreq = new LocalSolrQueryRequest( core, params);
assertQ("morelikethis - tom cruise",mltreq
,"//result/doc[1]/str[@name='id'][.='46']"
,"//result/doc[2]/str[@name='id'][.='43']",
"//lst[@name='interestingTerms']/arr[@name='42'][count(*)>0]",
"//lst[@name='interestingTerms']/arr[@name='42']/str[.='name:Cruise']");
params.set(MoreLikeThisParams.QF,"name^5.0 subword^0.1");
mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params);
assertQ("morelikethis with weights",mltreq
,"//result/doc[1]/str[@name='id'][.='43']"
,"//result/doc[2]/str[@name='id'][.='46']",
"//lst[@name='interestingTerms']/arr[@name='42'][count(*)>0]",
"//lst[@name='interestingTerms']/arr[@name='42']/str[.='name:Cruise']");
mltreq.close();
}
@Test
public void testMLT_debugEnabled_shouldReturnSimilarDocumentsWithDebug()
{
ModifiableSolrParams params = new ModifiableSolrParams();
initCommonMoreLikeThisParams(params);
params.set(MoreLikeThisParams.BOOST, "true");
params.set(CommonParams.Q, "id:44");
params.set(CommonParams.DEBUG_QUERY, "true");
SolrQueryRequest mltreq = new LocalSolrQueryRequest(h.getCore(), params);
assertQ("morelike this - harrison ford",mltreq
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='rawMLTQuery']"
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='boostedMLTQuery']"
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='realMLTQuery']"
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/lst[@name='explain']/str[@name='45']"
);
params.remove(CommonParams.DEBUG_QUERY);
params.set(CommonParams.Q, "{!field f=id}44");
mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params);
assertQ(mltreq
,"//result/doc[1]/str[@name='id'][.='45']");
mltreq.close();
}
@Test
public void testMLT_debugEnabledInterestingTermsDetails_shouldReturnSimilarDocumentsWithDebugAndInterestingTermsDetails()
{
ModifiableSolrParams params = new ModifiableSolrParams();
initCommonMoreLikeThisParams(params);
params.set(MoreLikeThisParams.BOOST, "true");
params.set(MoreLikeThisParams.INTERESTING_TERMS, "details");
params.set(CommonParams.Q, "id:44");
params.set(CommonParams.DEBUG_QUERY, "true");
SolrQueryRequest mltreq = new LocalSolrQueryRequest(h.getCore(), params);
assertQ("morelike this - harrison ford",mltreq
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='rawMLTQuery']"
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='boostedMLTQuery']"
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='realMLTQuery']"
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/lst[@name='explain']/str[@name='45']",
"//lst[@name='interestingTerms']/lst[1][count(*)>0]",
"//lst[@name='interestingTerms']/lst[1]/float[.>1.0]");
params.remove(CommonParams.DEBUG_QUERY);
params.set(CommonParams.Q, "{!field f=id}44");
mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params);
assertQ(mltreq
,"//result/doc[1]/str[@name='id'][.='45']",
"//lst[@name='interestingTerms']/lst[1][count(*)>0]",
"//lst[@name='interestingTerms']/lst[1]/float[.>1.0]");
mltreq.close();
}
@Test
public void testMLT_debugEnabledInterestingTermsList_shouldReturnSimilarDocumentsWithDebugAndInterestingTermsList()
{
ModifiableSolrParams params = new ModifiableSolrParams();
initCommonMoreLikeThisParams(params);
params.set(MoreLikeThisParams.BOOST, "true");
params.set(MoreLikeThisParams.INTERESTING_TERMS, "list");
params.set(CommonParams.Q, "id:44");
params.set(CommonParams.DEBUG_QUERY, "true");
SolrQueryRequest mltreq = new LocalSolrQueryRequest(h.getCore(), params);
assertQ("morelike this - harrison ford",mltreq
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='rawMLTQuery']"
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='boostedMLTQuery']"
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/str[@name='realMLTQuery']"
,"//lst[@name='debug']/lst[@name='moreLikeThis']/lst[@name='44']/lst[@name='explain']/str[@name='45']",
"//lst[@name='interestingTerms']/arr[@name='44'][count(*)>0]",
"//lst[@name='interestingTerms']/arr[@name='44']/str[.='name:Harrison']");
params.remove(CommonParams.DEBUG_QUERY);
params.set(CommonParams.Q, "{!field f=id}44");
mltreq.close(); mltreq = new LocalSolrQueryRequest(h.getCore(), params);
assertQ(mltreq
,"//result/doc[1]/str[@name='id'][.='45']",
"//lst[@name='interestingTerms']/arr[@name='44'][count(*)>0]",
"//lst[@name='interestingTerms']/arr[@name='44']/str[.='name:Harrison']");
mltreq.close();
}
}

View File

@ -86,6 +86,8 @@ If set to `true`, activates the `MoreLikeThis` component and enables Solr to ret
`mlt.count`::
Specifies the number of similar documents to be returned for each result. The default value is 5.
`mlt.interestingTerms`:: _Same as defined below for the MLT Handler._
== Parameters for the MoreLikeThisHandler
The table below summarizes parameters accessible through the `MoreLikeThisHandler`. It supports faceting, paging, and filtering using common query parameters, but does not work well with alternate query parsers.
@ -97,7 +99,12 @@ Specifies whether or not the response should include the matched document. If se
Specifies an offset into the main query search results to locate the document on which the `MoreLikeThis` query should operate. By default, the query operates on the first result for the q parameter.
`mlt.interestingTerms`::
Controls how the `MoreLikeThis` component presents the "interesting" terms (the top TF/IDF terms) for the query. Supports three settings. The setting list lists the terms. The setting none lists no terms. The setting details lists the terms along with the boost value used for each term. Unless `mlt.boost=true`, all terms will have `boost=1.0`.
Controls how the `MoreLikeThis` component presents the "interesting" terms (the top TF/IDF terms) for the query.
It supports three settings:
The setting `list` lists the terms.
The setting `none` lists no terms.
The setting `details` lists the terms along with the boost value used for each term.
Unless `mlt.boost=true`, all terms will have `boost=1.0`.
== MoreLikeThis Query Parser