mirror of https://github.com/apache/lucene.git
SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1406437 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
debe67b60b
commit
d93f599e32
|
@ -146,6 +146,9 @@ Bug Fixes
|
|||
thrown by PingRequestHandler. Do not log exceptions if a user tries to view a
|
||||
hidden file using ShowFileRequestHandler. (Tomás Fernández Löbbe via James Dyer)
|
||||
|
||||
* SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token.
|
||||
(Tom Burton-West, Robert Muir)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -243,6 +243,8 @@ class ExtendedDismaxQParser extends QParser {
|
|||
// For correct lucene queries, turn off mm processing if there
|
||||
// were explicit operators (except for AND).
|
||||
boolean doMinMatched = (numOR + numNOT + numPluses + numMinuses) == 0;
|
||||
// but always for unstructured implicit bqs created by getFieldQuery
|
||||
up.minShouldMatch = minShouldMatch;
|
||||
|
||||
try {
|
||||
up.setRemoveStopFilter(!stopwords);
|
||||
|
@ -888,6 +890,7 @@ class ExtendedDismaxQParser extends QParser {
|
|||
|
||||
private Map<String, Analyzer> nonStopFilterAnalyzerPerField;
|
||||
private boolean removeStopFilter;
|
||||
String minShouldMatch; // for inner boolean queries produced from a single fieldQuery
|
||||
|
||||
/**
|
||||
* Where we store a map from field name we expect to see in our query
|
||||
|
@ -1161,6 +1164,18 @@ class ExtendedDismaxQParser extends QParser {
|
|||
case FIELD: // fallthrough
|
||||
case PHRASE:
|
||||
Query query = super.getFieldQuery(field, val, type == QType.PHRASE);
|
||||
// A BooleanQuery is only possible from getFieldQuery if it came from
|
||||
// a single whitespace separated term. In this case, check the coordination
|
||||
// factor on the query: if its enabled, that means we aren't a set of synonyms
|
||||
// but instead multiple terms from one whitespace-separated term, we must
|
||||
// apply minShouldMatch here so that it works correctly with other things
|
||||
// like aliasing.
|
||||
if (query instanceof BooleanQuery) {
|
||||
BooleanQuery bq = (BooleanQuery) query;
|
||||
if (!bq.isCoordDisabled()) {
|
||||
SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch);
|
||||
}
|
||||
}
|
||||
if (query instanceof PhraseQuery) {
|
||||
PhraseQuery pq = (PhraseQuery)query;
|
||||
if (minClauseSize > 1 && pq.getTerms().length < minClauseSize) return null;
|
||||
|
|
|
@ -60,6 +60,13 @@ public class TestExtendedDismaxParser extends AbstractSolrTestCase {
|
|||
assertU(adoc("id", "52", "text_sw", "tekna theou klethomen"));
|
||||
assertU(adoc("id", "53", "text_sw", "nun tekna theou esmen"));
|
||||
assertU(adoc("id", "54", "text_sw", "phanera estin ta tekna tou theou"));
|
||||
assertU(adoc("id", "55", "standardtok", "大"));
|
||||
assertU(adoc("id", "56", "standardtok", "大亚"));
|
||||
assertU(adoc("id", "57", "standardtok", "大亚湾"));
|
||||
assertU(adoc("id", "58", "HTMLstandardtok", "大"));
|
||||
assertU(adoc("id", "59", "HTMLstandardtok", "大亚"));
|
||||
assertU(adoc("id", "60", "HTMLstandardtok", "大亚湾"));
|
||||
assertU(adoc("id", "61", "text_sw", "bazaaa")); // synonyms in an expansion group
|
||||
assertU(commit());
|
||||
}
|
||||
@Override
|
||||
|
@ -774,4 +781,153 @@ public class TestExtendedDismaxParser extends AbstractSolrTestCase {
|
|||
, "*[count(//doc)=1]");
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token
|
||||
*/
|
||||
public void testCJK() throws Exception {
|
||||
assertQ("test cjk (disjunction)",
|
||||
req("q", "大亚湾",
|
||||
"qf", "standardtok",
|
||||
"mm", "0%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=3]");
|
||||
assertQ("test cjk (minShouldMatch)",
|
||||
req("q", "大亚湾",
|
||||
"qf", "standardtok",
|
||||
"mm", "67%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=2]");
|
||||
assertQ("test cjk (conjunction)",
|
||||
req("q", "大亚湾",
|
||||
"qf", "standardtok",
|
||||
"mm", "100%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=1]");
|
||||
}
|
||||
|
||||
/**
|
||||
* test that minShouldMatch works with aliasing
|
||||
* for implicit boolean queries
|
||||
*/
|
||||
public void testCJKAliasing() throws Exception {
|
||||
// single field
|
||||
assertQ("test cjk (aliasing+disjunction)",
|
||||
req("q", "myalias:大亚湾",
|
||||
"f.myalias.qf", "standardtok",
|
||||
"mm", "0%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=3]");
|
||||
assertQ("test cjk (aliasing+minShouldMatch)",
|
||||
req("q", "myalias:大亚湾",
|
||||
"f.myalias.qf", "standardtok",
|
||||
"mm", "67%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=2]");
|
||||
assertQ("test cjk (aliasing+conjunction)",
|
||||
req("q", "myalias:大亚湾",
|
||||
"f.myalias.qf", "standardtok",
|
||||
"mm", "100%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=1]");
|
||||
// multifield
|
||||
assertQ("test cjk (aliasing+disjunction)",
|
||||
req("q", "myalias:大亚湾",
|
||||
"f.myalias.qf", "standardtok HTMLstandardtok",
|
||||
"mm", "0%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=6]");
|
||||
assertQ("test cjk (aliasing+minShouldMatch)",
|
||||
req("q", "myalias:大亚湾",
|
||||
"f.myalias.qf", "standardtok HTMLstandardtok",
|
||||
"mm", "67%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=4]");
|
||||
assertQ("test cjk (aliasing+conjunction)",
|
||||
req("q", "myalias:大亚湾",
|
||||
"f.myalias.qf", "standardtok HTMLstandardtok",
|
||||
"mm", "100%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=2]");
|
||||
}
|
||||
|
||||
/** Test that we apply boosts correctly */
|
||||
public void testCJKBoosts() throws Exception {
|
||||
assertQ("test cjk (disjunction)",
|
||||
req("q", "大亚湾",
|
||||
"qf", "standardtok^2 HTMLstandardtok",
|
||||
"mm", "0%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=6]", "//result/doc[1]/str[@name='id'][.='57']");
|
||||
assertQ("test cjk (minShouldMatch)",
|
||||
req("q", "大亚湾",
|
||||
"qf", "standardtok^2 HTMLstandardtok",
|
||||
"mm", "67%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=4]", "//result/doc[1]/str[@name='id'][.='57']");
|
||||
assertQ("test cjk (conjunction)",
|
||||
req("q", "大亚湾",
|
||||
"qf", "standardtok^2 HTMLstandardtok",
|
||||
"mm", "100%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=2]", "//result/doc[1]/str[@name='id'][.='57']");
|
||||
|
||||
// now boost the other field
|
||||
assertQ("test cjk (disjunction)",
|
||||
req("q", "大亚湾",
|
||||
"qf", "standardtok HTMLstandardtok^2",
|
||||
"mm", "0%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=6]", "//result/doc[1]/str[@name='id'][.='60']");
|
||||
assertQ("test cjk (minShouldMatch)",
|
||||
req("q", "大亚湾",
|
||||
"qf", "standardtok HTMLstandardtok^2",
|
||||
"mm", "67%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=4]", "//result/doc[1]/str[@name='id'][.='60']");
|
||||
assertQ("test cjk (conjunction)",
|
||||
req("q", "大亚湾",
|
||||
"qf", "standardtok HTMLstandardtok^2",
|
||||
"mm", "100%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=2]", "//result/doc[1]/str[@name='id'][.='60']");
|
||||
}
|
||||
|
||||
/** always apply minShouldMatch to the inner booleanqueries
|
||||
* created from whitespace, as these are never structured lucene queries
|
||||
* but only come from unstructured text */
|
||||
public void testCJKStructured() throws Exception {
|
||||
assertQ("test cjk (disjunction)",
|
||||
req("q", "大亚湾 OR bogus",
|
||||
"qf", "standardtok",
|
||||
"mm", "0%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=3]");
|
||||
assertQ("test cjk (minShouldMatch)",
|
||||
req("q", "大亚湾 OR bogus",
|
||||
"qf", "standardtok",
|
||||
"mm", "67%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=2]");
|
||||
assertQ("test cjk (conjunction)",
|
||||
req("q", "大亚湾 OR bogus",
|
||||
"qf", "standardtok",
|
||||
"mm", "100%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=1]");
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that we don't apply minShouldMatch to the inner boolean queries
|
||||
* when there are synonyms (these are indicated by coordination factor)
|
||||
*/
|
||||
public void testSynonyms() throws Exception {
|
||||
// document only contains baraaa, but should still match.
|
||||
assertQ("test synonyms",
|
||||
req("q", "fooaaa",
|
||||
"qf", "text_sw",
|
||||
"mm", "100%",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=1]");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue