mirror of https://github.com/apache/lucene.git
SOLR-7912: Add boost support, and also exclude the queried document in MoreLikeThis QParser
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1715931 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8eea72d476
commit
064f9e9970
|
@ -245,6 +245,9 @@ New Features
|
|||
* SOLR-8329: SchemaSimilarityFactory now supports a 'defaultSimFromFieldType' init option for using
|
||||
a fieldType name to identify which Similarity to use as a default. (hossman)
|
||||
|
||||
* SOLR-7912: Add boost support, and also exclude the queried document in MoreLikeThis QParser
|
||||
(Jens Wille via Anshum Gupta)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -16,8 +16,15 @@ package org.apache.solr.search.mlt;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.mlt.MoreLikeThis;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.StringUtils;
|
||||
|
@ -31,6 +38,7 @@ import org.apache.solr.response.SolrQueryResponse;
|
|||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.QueryParsing;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -62,6 +70,8 @@ public class CloudMLTQParser extends QParser {
|
|||
"document with id [" + id + "]");
|
||||
}
|
||||
|
||||
String[] qf = localParams.getParams("qf");
|
||||
Map<String,Float> boostFields = new HashMap<>();
|
||||
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
|
||||
|
||||
if(localParams.getInt("mintf") != null)
|
||||
|
@ -85,11 +95,14 @@ public class CloudMLTQParser extends QParser {
|
|||
mlt.setMaxDocFreq(localParams.getInt("maxdf"));
|
||||
}
|
||||
|
||||
if(localParams.get("boost") != null) {
|
||||
mlt.setBoost(localParams.getBool("boost"));
|
||||
boostFields = SolrPluginUtils.parseFieldBoosts(qf);
|
||||
}
|
||||
|
||||
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
|
||||
|
||||
String[] qf = localParams.getParams("qf");
|
||||
Map<String, Collection<Object>> filteredDocument = new HashMap();
|
||||
|
||||
ArrayList<String> fieldNames = new ArrayList();
|
||||
|
||||
if (qf != null) {
|
||||
|
@ -127,7 +140,35 @@ public class CloudMLTQParser extends QParser {
|
|||
}
|
||||
|
||||
try {
|
||||
return mlt.like(filteredDocument);
|
||||
Query rawMLTQuery = mlt.like(filteredDocument);
|
||||
BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
|
||||
|
||||
if (boostFields.size() > 0) {
|
||||
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
|
||||
newQ.setDisableCoord(boostedMLTQuery.isCoordDisabled());
|
||||
newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
|
||||
|
||||
for (BooleanClause clause : boostedMLTQuery) {
|
||||
Query q = clause.getQuery();
|
||||
Float b = boostFields.get(((TermQuery) q).getTerm().field());
|
||||
|
||||
if (b != null) {
|
||||
q = new BoostQuery(q, b);
|
||||
}
|
||||
|
||||
newQ.add(q, clause.getOccur());
|
||||
}
|
||||
|
||||
boostedMLTQuery = newQ.build();
|
||||
}
|
||||
|
||||
// exclude current document from results
|
||||
BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
|
||||
realMLTQuery.setDisableCoord(true);
|
||||
realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
|
||||
realMLTQuery.add(createIdQuery("id", id), BooleanClause.Occur.MUST_NOT);
|
||||
|
||||
return realMLTQuery.build();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request");
|
||||
|
@ -150,4 +191,17 @@ public class CloudMLTQParser extends QParser {
|
|||
return (SolrDocument) response.get("doc");
|
||||
}
|
||||
|
||||
private Query createIdQuery(String defaultField, String uniqueValue) {
|
||||
return new TermQuery(req.getSchema().getField(defaultField).getType().getNumericType() != null
|
||||
? createNumericTerm(defaultField, uniqueValue)
|
||||
: new Term(defaultField, uniqueValue));
|
||||
}
|
||||
|
||||
private Term createNumericTerm(String field, String uniqueValue) {
|
||||
BytesRefBuilder bytesRefBuilder = new BytesRefBuilder();
|
||||
bytesRefBuilder.grow(NumericUtils.BUF_SIZE_INT);
|
||||
NumericUtils.intToPrefixCoded(Integer.parseInt(uniqueValue), 0, bytesRefBuilder);
|
||||
return new Term(field, bytesRefBuilder.toBytesRef());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -18,6 +18,9 @@ package org.apache.solr.search.mlt;
|
|||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.mlt.MoreLikeThis;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
@ -32,9 +35,11 @@ import org.apache.solr.schema.SchemaField;
|
|||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.QueryParsing;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -55,6 +60,7 @@ public class SimpleMLTQParser extends QParser {
|
|||
|
||||
SolrIndexSearcher searcher = req.getSearcher();
|
||||
Query docIdQuery = createIdQuery(defaultField, uniqueValue);
|
||||
Map<String,Float> boostFields = new HashMap<>();
|
||||
|
||||
try {
|
||||
TopDocs td = searcher.search(docIdQuery, 1);
|
||||
|
@ -85,6 +91,11 @@ public class SimpleMLTQParser extends QParser {
|
|||
if(localParams.get("maxdf") != null) {
|
||||
mlt.setMaxDocFreq(localParams.getInt("maxdf"));
|
||||
}
|
||||
|
||||
if(localParams.get("boost") != null) {
|
||||
mlt.setBoost(localParams.getBool("boost"));
|
||||
boostFields = SolrPluginUtils.parseFieldBoosts(qf);
|
||||
}
|
||||
|
||||
ArrayList<String> fields = new ArrayList();
|
||||
|
||||
|
@ -115,8 +126,35 @@ public class SimpleMLTQParser extends QParser {
|
|||
mlt.setFieldNames(fields.toArray(new String[fields.size()]));
|
||||
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
|
||||
|
||||
return mlt.like(scoreDocs[0].doc);
|
||||
Query rawMLTQuery = mlt.like(scoreDocs[0].doc);
|
||||
BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
|
||||
|
||||
if (boostFields.size() > 0) {
|
||||
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
|
||||
newQ.setDisableCoord(boostedMLTQuery.isCoordDisabled());
|
||||
newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
|
||||
|
||||
for (BooleanClause clause : boostedMLTQuery) {
|
||||
Query q = clause.getQuery();
|
||||
Float b = boostFields.get(((TermQuery) q).getTerm().field());
|
||||
|
||||
if (b != null) {
|
||||
q = new BoostQuery(q, b);
|
||||
}
|
||||
|
||||
newQ.add(q, clause.getOccur());
|
||||
}
|
||||
|
||||
boostedMLTQuery = newQ.build();
|
||||
}
|
||||
|
||||
// exclude current document from results
|
||||
BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
|
||||
realMLTQuery.setDisableCoord(true);
|
||||
realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
|
||||
realMLTQuery.add(docIdQuery, BooleanClause.Occur.MUST_NOT);
|
||||
|
||||
return realMLTQuery.build();
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Error completing MLT request" + e.getMessage());
|
||||
|
|
|
@ -109,7 +109,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
|||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17");
|
||||
QueryResponse queryResponse = cloudClient.query(params);
|
||||
SolrDocumentList solrDocuments = queryResponse.getResults();
|
||||
int[] expectedIds = new int[]{17, 7, 13, 14, 15, 16, 20, 22, 24, 32};
|
||||
int[] expectedIds = new int[]{7, 13, 14, 15, 16, 20, 22, 24, 32, 9};
|
||||
int[] actualIds = new int[10];
|
||||
int i = 0;
|
||||
for (SolrDocument solrDocument : solrDocuments) {
|
||||
|
@ -122,7 +122,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
|||
params.set(CommonParams.DEBUG, "true");
|
||||
queryResponse = queryServer(params);
|
||||
solrDocuments = queryResponse.getResults();
|
||||
expectedIds = new int[]{3, 29, 27, 26, 28};
|
||||
expectedIds = new int[]{29, 27, 26, 28};
|
||||
actualIds = new int[solrDocuments.size()];
|
||||
i = 0;
|
||||
for (SolrDocument solrDocument : solrDocuments) {
|
||||
|
@ -130,30 +130,27 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
|||
}
|
||||
assertArrayEquals(expectedIds, actualIds);
|
||||
|
||||
String expectedQueryString = "lowerfilt:bmw lowerfilt:usa";
|
||||
String[] expectedQueryStrings = new String[]{
|
||||
"(+(lowerfilt:bmw lowerfilt:usa) -id:3)/no_coord",
|
||||
"(+(lowerfilt:usa lowerfilt:bmw) -id:3)/no_coord"};
|
||||
|
||||
ArrayList<String> actualParsedQueries;
|
||||
String[] actualParsedQueries;
|
||||
|
||||
if(queryResponse.getDebugMap().get("parsedquery") instanceof String) {
|
||||
actualParsedQueries = new ArrayList();
|
||||
actualParsedQueries.add((String) queryResponse.getDebugMap().get("parsedquery"));
|
||||
actualParsedQueries = new String[]{(String) queryResponse.getDebugMap().get("parsedquery")};
|
||||
} else {
|
||||
actualParsedQueries = (ArrayList<String>) queryResponse
|
||||
.getDebugMap().get("parsedquery");
|
||||
}
|
||||
|
||||
for (int counter = 0; counter < actualParsedQueries.size(); counter++) {
|
||||
assertTrue("Parsed queries aren't equal",
|
||||
compareParsedQueryStrings(expectedQueryString,
|
||||
actualParsedQueries.get(counter)));
|
||||
actualParsedQueries = ((ArrayList<String>) queryResponse
|
||||
.getDebugMap().get("parsedquery")).toArray(new String[0]);
|
||||
Arrays.sort(actualParsedQueries);
|
||||
}
|
||||
assertArrayEquals(expectedQueryStrings, actualParsedQueries);
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1 mindf=0 mintf=1}26");
|
||||
params.set(CommonParams.DEBUG, "true");
|
||||
queryResponse = queryServer(params);
|
||||
solrDocuments = queryResponse.getResults();
|
||||
expectedIds = new int[]{26, 27, 3, 29, 28};
|
||||
expectedIds = new int[]{27, 3, 29, 28};
|
||||
actualIds = new int[solrDocuments.size()];
|
||||
i = 0;
|
||||
for (SolrDocument solrDocument : solrDocuments) {
|
||||
|
@ -162,21 +159,18 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
|||
|
||||
assertArrayEquals(expectedIds, actualIds);
|
||||
|
||||
expectedQueryString = "lowerfilt:bmw lowerfilt:usa lowerfilt:328i";
|
||||
expectedQueryStrings = new String[]{
|
||||
"(+(lowerfilt:bmw lowerfilt:usa) -id:26)/no_coord",
|
||||
"(+(lowerfilt:usa lowerfilt:bmw lowerfilt:328i) -id:26)/no_coord"};
|
||||
|
||||
if(queryResponse.getDebugMap().get("parsedquery") instanceof String) {
|
||||
actualParsedQueries = new ArrayList();
|
||||
actualParsedQueries.add((String) queryResponse.getDebugMap().get("parsedquery"));
|
||||
actualParsedQueries = new String[]{(String) queryResponse.getDebugMap().get("parsedquery")};
|
||||
} else {
|
||||
actualParsedQueries = (ArrayList<String>) queryResponse
|
||||
.getDebugMap().get("parsedquery");
|
||||
}
|
||||
|
||||
for (int counter = 0; counter < actualParsedQueries.size(); counter++) {
|
||||
assertTrue("Parsed queries aren't equal",
|
||||
compareParsedQueryStrings(expectedQueryString,
|
||||
actualParsedQueries.get(counter)));
|
||||
actualParsedQueries = ((ArrayList<String>) queryResponse
|
||||
.getDebugMap().get("parsedquery")).toArray(new String[0]);
|
||||
Arrays.sort(actualParsedQueries);
|
||||
}
|
||||
assertArrayEquals(expectedQueryStrings, actualParsedQueries);
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
// Test out a high value of df and make sure nothing matches.
|
||||
|
@ -200,7 +194,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
|||
params.set(CommonParams.DEBUG, "true");
|
||||
queryResponse = queryServer(params);
|
||||
solrDocuments = queryResponse.getResults();
|
||||
assertEquals("Expected to match 4 documents with a minwl of 3 but found more", 5, solrDocuments.size());
|
||||
assertEquals("Expected to match 4 documents with a minwl of 3 but found more", 4, solrDocuments.size());
|
||||
|
||||
// Assert that {!mlt}id does not throw an exception i.e. implicitly, only fields that are stored + have explicit
|
||||
// analyzer are used for MLT Query construction.
|
||||
|
@ -210,7 +204,7 @@ public class CloudMLTQParserTest extends AbstractFullDistribZkTestBase {
|
|||
queryResponse = queryServer(params);
|
||||
solrDocuments = queryResponse.getResults();
|
||||
actualIds = new int[solrDocuments.size()];
|
||||
expectedIds = new int[]{13, 14, 15, 16, 20, 22, 24, 32, 18, 19};
|
||||
expectedIds = new int[]{13, 14, 15, 16, 22, 24, 32, 18, 19, 21};
|
||||
i = 0;
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (SolrDocument solrDocument : solrDocuments) {
|
||||
|
|
|
@ -101,10 +101,9 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
|
|||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=0 mintf=1}26");
|
||||
params.set(CommonParams.DEBUG, "true");
|
||||
assertQ(req(params),
|
||||
"//result/doc[1]/int[@name='id'][.='26']",
|
||||
"//result/doc[2]/int[@name='id'][.='29']",
|
||||
"//result/doc[3]/int[@name='id'][.='27']",
|
||||
"//result/doc[4]/int[@name='id'][.='28']"
|
||||
"//result/doc[1]/int[@name='id'][.='29']",
|
||||
"//result/doc[2]/int[@name='id'][.='27']",
|
||||
"//result/doc[3]/int[@name='id'][.='28']"
|
||||
);
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
|
@ -118,14 +117,14 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
|
|||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=3 mintf=1 mindf=1}26");
|
||||
params.set(CommonParams.DEBUG, "true");
|
||||
assertQ(req(params),
|
||||
"//result[@numFound='4']"
|
||||
"//result[@numFound='3']"
|
||||
);
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=4 mintf=1 mindf=1}26");
|
||||
params.set(CommonParams.DEBUG, "true");
|
||||
assertQ(req(params),
|
||||
"//result[@numFound='1']"
|
||||
"//result[@numFound='0']"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue