mirror of https://github.com/apache/lucene.git
SOLR-9644: Fixed SimpleMLTQParser and CloudMLTQParser to handle boosts properly and CloudMLTQParser to only extract actual values from IndexableField type fields to the filtered document.
This commit is contained in:
parent
b8383db06e
commit
2b4e3dd941
|
@ -345,6 +345,10 @@ Bug Fixes
|
|||
* SOLR-9883: Example schemaless solr config files can lead to invalid tlog replays: when updates are buffered,
|
||||
update processors ordered before DistributedUpdateProcessor, e.g. field normalization, are never run. (Steve Rowe)
|
||||
|
||||
* SOLR-9644: SimpleMLTQParser and CloudMLTQParser did not handle field boosts properly
|
||||
and CloudMLTQParser included extra strings from the field definitions in the query.
|
||||
(Ere Maijala via Anshum Gupta)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.queries.mlt.MoreLikeThis;
|
||||
|
@ -64,73 +65,83 @@ public class CloudMLTQParser extends QParser {
|
|||
SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " +
|
||||
"document with id [" + id + "]");
|
||||
}
|
||||
|
||||
|
||||
String[] qf = localParams.getParams("qf");
|
||||
Map<String,Float> boostFields = new HashMap<>();
|
||||
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
|
||||
|
||||
|
||||
mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
|
||||
|
||||
mlt.setMinDocFreq(localParams.getInt("mindf", 0));
|
||||
|
||||
mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
|
||||
|
||||
mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
|
||||
|
||||
mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
|
||||
|
||||
mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
|
||||
|
||||
mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
|
||||
|
||||
if(localParams.get("boost") != null) {
|
||||
mlt.setBoost(localParams.getBool("boost"));
|
||||
boostFields = SolrPluginUtils.parseFieldBoosts(qf);
|
||||
}
|
||||
Boolean boost = localParams.getBool("boost", MoreLikeThis.DEFAULT_BOOST);
|
||||
mlt.setBoost(boost);
|
||||
|
||||
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
|
||||
|
||||
Map<String, Collection<Object>> filteredDocument = new HashMap<>();
|
||||
ArrayList<String> fieldNames = new ArrayList<>();
|
||||
String[] fieldNames;
|
||||
|
||||
if (qf != null) {
|
||||
ArrayList<String> fields = new ArrayList();
|
||||
for (String fieldName : qf) {
|
||||
if (!StringUtils.isEmpty(fieldName)) {
|
||||
String[] strings = splitList.split(fieldName);
|
||||
for (String string : strings) {
|
||||
if (!StringUtils.isEmpty(string)) {
|
||||
fieldNames.add(string);
|
||||
fields.add(string);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Parse field names and boosts from the fields
|
||||
boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
|
||||
fieldNames = boostFields.keySet().toArray(new String[0]);
|
||||
} else {
|
||||
ArrayList<String> fields = new ArrayList();
|
||||
for (String field : doc.getFieldNames()) {
|
||||
// Only use fields that are stored and have an explicit analyzer.
|
||||
// This makes sense as the query uses tf/idf/.. for query construction.
|
||||
// We might want to relook and change this in the future though.
|
||||
SchemaField f = req.getSchema().getFieldOrNull(field);
|
||||
if (f != null && f.stored() && f.getType().isExplicitAnalyzer()) {
|
||||
fieldNames.add(field);
|
||||
fields.add(field);
|
||||
}
|
||||
}
|
||||
fieldNames = fields.toArray(new String[0]);
|
||||
}
|
||||
|
||||
if( fieldNames.size() < 1 ) {
|
||||
if (fieldNames.length < 1) {
|
||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
|
||||
"MoreLikeThis requires at least one similarity field: qf" );
|
||||
}
|
||||
|
||||
mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()]));
|
||||
mlt.setFieldNames(fieldNames);
|
||||
for (String field : fieldNames) {
|
||||
filteredDocument.put(field, doc.getFieldValues(field));
|
||||
Collection<Object> fieldValues = doc.getFieldValues(field);
|
||||
if (fieldValues != null) {
|
||||
Collection<Object> values = new ArrayList();
|
||||
for (Object val : fieldValues) {
|
||||
if (val instanceof IndexableField) {
|
||||
values.add(((IndexableField)val).stringValue());
|
||||
}
|
||||
else {
|
||||
values.add(val);
|
||||
}
|
||||
}
|
||||
filteredDocument.put(field, values);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
Query rawMLTQuery = mlt.like(filteredDocument);
|
||||
BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
|
||||
|
||||
if (boostFields.size() > 0) {
|
||||
if (boost && boostFields.size() > 0) {
|
||||
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
|
||||
newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
|
||||
|
||||
|
|
|
@ -76,16 +76,13 @@ public class SimpleMLTQParser extends QParser {
|
|||
mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
|
||||
mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
|
||||
mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
|
||||
Boolean boost = localParams.getBool("boost", false);
|
||||
mlt.setBoost(boost);
|
||||
|
||||
// what happens if value is explicitly set to false?
|
||||
if(localParams.get("boost") != null) {
|
||||
mlt.setBoost(localParams.getBool("boost", false));
|
||||
boostFields = SolrPluginUtils.parseFieldBoosts(qf);
|
||||
}
|
||||
String[] fieldNames;
|
||||
|
||||
ArrayList<String> fields = new ArrayList<>();
|
||||
|
||||
if (qf != null) {
|
||||
ArrayList<String> fields = new ArrayList<>();
|
||||
for (String fieldName : qf) {
|
||||
if (!StringUtils.isEmpty(fieldName)) {
|
||||
String[] strings = splitList.split(fieldName);
|
||||
|
@ -96,26 +93,31 @@ public class SimpleMLTQParser extends QParser {
|
|||
}
|
||||
}
|
||||
}
|
||||
// Parse field names and boosts from the fields
|
||||
boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
|
||||
fieldNames = boostFields.keySet().toArray(new String[0]);
|
||||
} else {
|
||||
Map<String, SchemaField> fieldNames = req.getSearcher().getSchema().getFields();
|
||||
for (String fieldName : fieldNames.keySet()) {
|
||||
if (fieldNames.get(fieldName).indexed() && fieldNames.get(fieldName).stored())
|
||||
if (fieldNames.get(fieldName).getType().getNumericType() == null)
|
||||
Map<String, SchemaField> fieldDefinitions = req.getSearcher().getSchema().getFields();
|
||||
ArrayList<String> fields = new ArrayList();
|
||||
for (String fieldName : fieldDefinitions.keySet()) {
|
||||
if (fieldDefinitions.get(fieldName).indexed() && fieldDefinitions.get(fieldName).stored())
|
||||
if (fieldDefinitions.get(fieldName).getType().getNumericType() == null)
|
||||
fields.add(fieldName);
|
||||
}
|
||||
fieldNames = fields.toArray(new String[0]);
|
||||
}
|
||||
if( fields.size() < 1 ) {
|
||||
if (fieldNames.length < 1) {
|
||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
|
||||
"MoreLikeThis requires at least one similarity field: qf" );
|
||||
}
|
||||
|
||||
mlt.setFieldNames(fields.toArray(new String[fields.size()]));
|
||||
mlt.setFieldNames(fieldNames);
|
||||
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
|
||||
|
||||
Query rawMLTQuery = mlt.like(scoreDocs[0].doc);
|
||||
BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
|
||||
|
||||
if (boostFields.size() > 0) {
|
||||
if (boost && boostFields.size() > 0) {
|
||||
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
|
||||
newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
|
||||
|
||||
|
|
|
@ -121,6 +121,27 @@ public class CloudMLTQParserTest extends SolrCloudTestCase {
|
|||
}
|
||||
assertArrayEquals(expectedIds, actualIds);
|
||||
|
||||
queryResponse = cluster.getSolrClient().query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u^10,lowerfilt1_u^1000 boost=false mintf=0 mindf=0}30"));
|
||||
solrDocuments = queryResponse.getResults();
|
||||
expectedIds = new int[]{31, 18, 23, 13, 14, 20, 22, 32, 19, 21};
|
||||
actualIds = new int[solrDocuments.size()];
|
||||
i = 0;
|
||||
for (SolrDocument solrDocument : solrDocuments) {
|
||||
actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
|
||||
}
|
||||
System.out.println("DEBUG ACTUAL IDS 1: " + Arrays.toString(actualIds));
|
||||
assertArrayEquals(expectedIds, actualIds);
|
||||
|
||||
queryResponse = cluster.getSolrClient().query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u^10,lowerfilt1_u^1000 boost=true mintf=0 mindf=0}30"));
|
||||
solrDocuments = queryResponse.getResults();
|
||||
expectedIds = new int[]{29, 31, 32, 18, 23, 13, 14, 20, 22, 19};
|
||||
actualIds = new int[solrDocuments.size()];
|
||||
i = 0;
|
||||
for (SolrDocument solrDocument : solrDocuments) {
|
||||
actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
|
||||
}
|
||||
System.out.println("DEBUG ACTUAL IDS 2: " + Arrays.toString(actualIds));
|
||||
assertArrayEquals(expectedIds, actualIds);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -220,7 +241,7 @@ public class CloudMLTQParserTest extends SolrCloudTestCase {
|
|||
}
|
||||
assertArrayEquals(expectedIds, actualIds);
|
||||
}
|
||||
|
||||
|
||||
public void testInvalidSourceDocument() throws IOException {
|
||||
SolrException e = expectThrows(SolrException.class, () -> {
|
||||
cluster.getSolrClient().query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u}999999"));
|
||||
|
|
|
@ -107,9 +107,38 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
|
|||
"//result/doc[10]/int[@name='id'][.='23']"
|
||||
);
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1^1000 boost=false mintf=0 mindf=0}30");
|
||||
assertQ(req(params),
|
||||
"//result/doc[1]/int[@name='id'][.='31']",
|
||||
"//result/doc[2]/int[@name='id'][.='13']",
|
||||
"//result/doc[3]/int[@name='id'][.='14']",
|
||||
"//result/doc[4]/int[@name='id'][.='18']",
|
||||
"//result/doc[5]/int[@name='id'][.='20']",
|
||||
"//result/doc[6]/int[@name='id'][.='22']",
|
||||
"//result/doc[7]/int[@name='id'][.='23']",
|
||||
"//result/doc[8]/int[@name='id'][.='32']",
|
||||
"//result/doc[9]/int[@name='id'][.='15']",
|
||||
"//result/doc[10]/int[@name='id'][.='16']"
|
||||
);
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1^1000 boost=true mintf=0 mindf=0}30");
|
||||
assertQ(req(params),
|
||||
"//result/doc[1]/int[@name='id'][.='29']",
|
||||
"//result/doc[2]/int[@name='id'][.='31']",
|
||||
"//result/doc[3]/int[@name='id'][.='32']",
|
||||
"//result/doc[4]/int[@name='id'][.='13']",
|
||||
"//result/doc[5]/int[@name='id'][.='14']",
|
||||
"//result/doc[6]/int[@name='id'][.='18']",
|
||||
"//result/doc[7]/int[@name='id'][.='20']",
|
||||
"//result/doc[8]/int[@name='id'][.='22']",
|
||||
"//result/doc[9]/int[@name='id'][.='23']",
|
||||
"//result/doc[10]/int[@name='id'][.='15']"
|
||||
);
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=0 mintf=1}26");
|
||||
params.set(CommonParams.DEBUG, "true");
|
||||
assertQ(req(params),
|
||||
"//result/doc[1]/int[@name='id'][.='29']",
|
||||
"//result/doc[2]/int[@name='id'][.='27']",
|
||||
|
@ -118,14 +147,12 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
|
|||
|
||||
params = new ModifiableSolrParams();
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=10 mintf=1}26");
|
||||
params.set(CommonParams.DEBUG, "true");
|
||||
assertQ(req(params),
|
||||
"//result[@numFound='0']"
|
||||
);
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=3 mintf=1 mindf=1}26");
|
||||
params.set(CommonParams.DEBUG, "true");
|
||||
assertQ(req(params),
|
||||
"//result[@numFound='3']"
|
||||
);
|
||||
|
|
Loading…
Reference in New Issue