mirror of https://github.com/apache/lucene.git
SOLR-3085: New edismax param mm.autoRelax which helps in certain cases of the stopwords/zero-hits issue
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1693833 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0efca23e6e
commit
4a63a328d4
|
@ -50,6 +50,11 @@ Upgrading from Solr 5.x
|
|||
Detailed Change List
|
||||
----------------------
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
|
||||
* SOLR-3085: New edismax param mm.autoRelax which helps in certain cases of the stopwords/zero-hits issue (janhoy)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -243,7 +243,8 @@ public class DisMaxQParser extends QParser {
|
|||
if (dis instanceof BooleanQuery) {
|
||||
BooleanQuery.Builder t = new BooleanQuery.Builder();
|
||||
SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery) dis);
|
||||
SolrPluginUtils.setMinShouldMatch(t, minShouldMatch);
|
||||
boolean mmAutoRelax = params.getBool(DisMaxParams.MM_AUTORELAX, false);
|
||||
SolrPluginUtils.setMinShouldMatch(t, minShouldMatch, mmAutoRelax);
|
||||
query = t.build();
|
||||
}
|
||||
return query;
|
||||
|
|
|
@ -299,7 +299,7 @@ public class ExtendedDismaxQParser extends QParser {
|
|||
if (query instanceof BooleanQuery) {
|
||||
BooleanQuery.Builder t = new BooleanQuery.Builder();
|
||||
SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery)query);
|
||||
SolrPluginUtils.setMinShouldMatch(t, config.minShouldMatch);
|
||||
SolrPluginUtils.setMinShouldMatch(t, config.minShouldMatch, config.mmAutoRelax);
|
||||
query = t.build();
|
||||
}
|
||||
return query;
|
||||
|
@ -341,7 +341,7 @@ public class ExtendedDismaxQParser extends QParser {
|
|||
// were explicit operators (except for AND).
|
||||
boolean doMinMatched = doMinMatched(clauses, config.lowercaseOperators);
|
||||
if (doMinMatched && query instanceof BooleanQuery) {
|
||||
query = SolrPluginUtils.setMinShouldMatch((BooleanQuery)query, config.minShouldMatch);
|
||||
query = SolrPluginUtils.setMinShouldMatch((BooleanQuery)query, config.minShouldMatch, config.mmAutoRelax);
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
@ -1238,7 +1238,7 @@ public class ExtendedDismaxQParser extends QParser {
|
|||
if (query instanceof BooleanQuery) {
|
||||
BooleanQuery bq = (BooleanQuery) query;
|
||||
if (!bq.isCoordDisabled()) {
|
||||
query = SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch);
|
||||
query = SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch, false);
|
||||
}
|
||||
}
|
||||
if (query instanceof PhraseQuery) {
|
||||
|
@ -1490,6 +1490,8 @@ public class ExtendedDismaxQParser extends QParser {
|
|||
protected int qslop;
|
||||
|
||||
protected boolean stopwords;
|
||||
|
||||
protected boolean mmAutoRelax;
|
||||
|
||||
protected String altQ;
|
||||
|
||||
|
@ -1527,6 +1529,8 @@ public class ExtendedDismaxQParser extends QParser {
|
|||
qslop = solrParams.getInt(DisMaxParams.QS, 0);
|
||||
|
||||
stopwords = solrParams.getBool(DMP.STOPWORDS, true);
|
||||
|
||||
mmAutoRelax = solrParams.getBool(DMP.MM_AUTORELAX, false);
|
||||
|
||||
altQ = solrParams.get( DisMaxParams.ALTQ );
|
||||
|
||||
|
|
|
@ -560,11 +560,14 @@ public class SolrPluginUtils {
|
|||
return out;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks the number of optional clauses in the query, and compares it
|
||||
* with the specification string to determine the proper value to use.
|
||||
*
|
||||
* <p>
|
||||
* If mmAutoRelax=true, we'll perform auto relaxation of mm if tokens
|
||||
* are removed from some but not all DisMax clauses, as can happen when
|
||||
* stopwords or punctuation tokens are removed in analysis.
|
||||
* </p>
|
||||
* <p>
|
||||
* Details about the specification format can be found
|
||||
* <a href="doc-files/min-should-match.html">here</a>
|
||||
|
@ -589,29 +592,54 @@ public class SolrPluginUtils {
|
|||
* <p>:TODO: should optimize the case where number is same
|
||||
* as clauses to just make them all "required"
|
||||
* </p>
|
||||
*
|
||||
* @param q The query as a BooleanQuery.Builder
|
||||
* @param spec The mm spec
|
||||
* @param mmAutoRelax whether to perform auto relaxation of mm if tokens are removed from some but not all DisMax clauses
|
||||
*/
|
||||
public static void setMinShouldMatch(BooleanQuery.Builder q, String spec) {
|
||||
public static void setMinShouldMatch(BooleanQuery.Builder q, String spec, boolean mmAutoRelax) {
|
||||
|
||||
int optionalClauses = 0;
|
||||
int maxDisjunctsSize = 0;
|
||||
int optionalDismaxClauses = 0;
|
||||
for (BooleanClause c : q.build().clauses()) {
|
||||
if (c.getOccur() == Occur.SHOULD) {
|
||||
optionalClauses++;
|
||||
if (mmAutoRelax && c.getQuery() instanceof DisjunctionMaxQuery) {
|
||||
int numDisjuncts = ((DisjunctionMaxQuery)c.getQuery()).getDisjuncts().size();
|
||||
if (numDisjuncts>maxDisjunctsSize) {
|
||||
maxDisjunctsSize = numDisjuncts;
|
||||
optionalDismaxClauses = 1;
|
||||
}
|
||||
else if (numDisjuncts == maxDisjunctsSize) {
|
||||
optionalDismaxClauses++;
|
||||
}
|
||||
} else {
|
||||
optionalClauses++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int msm = calculateMinShouldMatch(optionalClauses, spec);
|
||||
int msm = calculateMinShouldMatch(optionalClauses + optionalDismaxClauses, spec);
|
||||
if (0 < msm) {
|
||||
q.setMinimumNumberShouldMatch(msm);
|
||||
}
|
||||
}
|
||||
|
||||
public static void setMinShouldMatch(BooleanQuery.Builder q, String spec) {
|
||||
setMinShouldMatch(q, spec, false);
|
||||
}
|
||||
|
||||
public static BooleanQuery setMinShouldMatch(BooleanQuery q, String spec) {
|
||||
return setMinShouldMatch(q, spec, false);
|
||||
}
|
||||
|
||||
public static BooleanQuery setMinShouldMatch(BooleanQuery q, String spec, boolean mmAutoRelax) {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
builder.setDisableCoord(q.isCoordDisabled());
|
||||
for (BooleanClause clause : q) {
|
||||
builder.add(clause);
|
||||
}
|
||||
setMinShouldMatch(builder, spec);
|
||||
setMinShouldMatch(builder, spec, mmAutoRelax);
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
|
|
|
@ -390,6 +390,66 @@ public class SolrPluginUtilsTest extends SolrTestCaseJ4 {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMinShouldMatchAutoRelax() {
|
||||
/* The basics should not be affected by autoRelax */
|
||||
BooleanQuery.Builder q = new BooleanQuery.Builder();
|
||||
q.add(new TermQuery(new Term("a","b")), Occur.SHOULD);
|
||||
q.add(new TermQuery(new Term("a","c")), Occur.SHOULD);
|
||||
q.add(new TermQuery(new Term("a","d")), Occur.SHOULD);
|
||||
q.add(new TermQuery(new Term("a","d")), Occur.SHOULD);
|
||||
|
||||
SolrPluginUtils.setMinShouldMatch(q, "0", true);
|
||||
assertEquals(0, q.build().getMinimumNumberShouldMatch());
|
||||
|
||||
SolrPluginUtils.setMinShouldMatch(q, "1", true);
|
||||
assertEquals(1, q.build().getMinimumNumberShouldMatch());
|
||||
|
||||
SolrPluginUtils.setMinShouldMatch(q, "50%", true);
|
||||
assertEquals(2, q.build().getMinimumNumberShouldMatch());
|
||||
|
||||
SolrPluginUtils.setMinShouldMatch(q, "99", true);
|
||||
assertEquals(4, q.build().getMinimumNumberShouldMatch());
|
||||
|
||||
q.add(new TermQuery(new Term("a","e")), Occur.MUST);
|
||||
q.add(new TermQuery(new Term("a","f")), Occur.MUST);
|
||||
|
||||
SolrPluginUtils.setMinShouldMatch(q, "50%", true);
|
||||
assertEquals(2, q.build().getMinimumNumberShouldMatch());
|
||||
|
||||
/* Simulate stopwords through uneven disjuncts */
|
||||
q = new BooleanQuery.Builder();
|
||||
DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(0.0f);
|
||||
dmq.add(new TermQuery(new Term("a","foo")));
|
||||
q.add(dmq, Occur.SHOULD);
|
||||
dmq = new DisjunctionMaxQuery(0.0f);
|
||||
dmq.add(new TermQuery(new Term("a","foo")));
|
||||
dmq.add(new TermQuery(new Term("b","foo")));
|
||||
q.add(dmq, Occur.SHOULD);
|
||||
dmq = new DisjunctionMaxQuery(0.0f);
|
||||
dmq.add(new TermQuery(new Term("a","bar")));
|
||||
dmq.add(new TermQuery(new Term("b","bar")));
|
||||
q.add(dmq, Occur.SHOULD);
|
||||
|
||||
// Without relax
|
||||
SolrPluginUtils.setMinShouldMatch(q, "100%", false);
|
||||
assertEquals(3, q.build().getMinimumNumberShouldMatch());
|
||||
|
||||
// With relax
|
||||
SolrPluginUtils.setMinShouldMatch(q, "100%", true);
|
||||
assertEquals(2, q.build().getMinimumNumberShouldMatch());
|
||||
|
||||
// Still same result with a MUST clause extra
|
||||
q.add(new TermQuery(new Term("a","must")), Occur.MUST);
|
||||
SolrPluginUtils.setMinShouldMatch(q, "100%", true);
|
||||
assertEquals(2, q.build().getMinimumNumberShouldMatch());
|
||||
|
||||
// Combination of dismax and non-dismax SHOULD clauses
|
||||
q.add(new TermQuery(new Term("b","should")), Occur.SHOULD);
|
||||
SolrPluginUtils.setMinShouldMatch(q, "100%", true);
|
||||
assertEquals(3, q.build().getMinimumNumberShouldMatch());
|
||||
}
|
||||
|
||||
/** macro */
|
||||
public String pe(CharSequence s) {
|
||||
return SolrPluginUtils.partialEscape(s).toString();
|
||||
|
|
|
@ -42,7 +42,12 @@ public interface DisMaxParams {
|
|||
|
||||
/** query and init param for MinShouldMatch specification */
|
||||
public static String MM = "mm";
|
||||
|
||||
|
||||
/**
|
||||
* If set to true, will try to reduce MM if tokens are removed from some clauses but not all
|
||||
*/
|
||||
public static String MM_AUTORELAX = "mm.autoRelax";
|
||||
|
||||
/**
|
||||
* query and init param for Phrase Slop value in phrase
|
||||
* boost query (in pf fields)
|
||||
|
|
Loading…
Reference in New Issue