SOLR-3085: New edismax param mm.autoRelax which helps in certain cases of the stopwords/zero-hits issue

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1693833 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jan Høydahl 2015-08-02 22:31:19 +00:00
parent 0efca23e6e
commit 4a63a328d4
6 changed files with 114 additions and 11 deletions

View File

@ -50,6 +50,11 @@ Upgrading from Solr 5.x
Detailed Change List
----------------------
New Features
----------------------
* SOLR-3085: New edismax param mm.autoRelax which helps in certain cases of the stopwords/zero-hits issue (janhoy)
Other Changes
----------------------

View File

@ -243,7 +243,8 @@ public class DisMaxQParser extends QParser {
if (dis instanceof BooleanQuery) {
BooleanQuery.Builder t = new BooleanQuery.Builder();
SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery) dis);
SolrPluginUtils.setMinShouldMatch(t, minShouldMatch);
boolean mmAutoRelax = params.getBool(DisMaxParams.MM_AUTORELAX, false);
SolrPluginUtils.setMinShouldMatch(t, minShouldMatch, mmAutoRelax);
query = t.build();
}
return query;

View File

@ -299,7 +299,7 @@ public class ExtendedDismaxQParser extends QParser {
if (query instanceof BooleanQuery) {
BooleanQuery.Builder t = new BooleanQuery.Builder();
SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery)query);
SolrPluginUtils.setMinShouldMatch(t, config.minShouldMatch);
SolrPluginUtils.setMinShouldMatch(t, config.minShouldMatch, config.mmAutoRelax);
query = t.build();
}
return query;
@ -341,7 +341,7 @@ public class ExtendedDismaxQParser extends QParser {
// were explicit operators (except for AND).
boolean doMinMatched = doMinMatched(clauses, config.lowercaseOperators);
if (doMinMatched && query instanceof BooleanQuery) {
query = SolrPluginUtils.setMinShouldMatch((BooleanQuery)query, config.minShouldMatch);
query = SolrPluginUtils.setMinShouldMatch((BooleanQuery)query, config.minShouldMatch, config.mmAutoRelax);
}
return query;
}
@ -1238,7 +1238,7 @@ public class ExtendedDismaxQParser extends QParser {
if (query instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) query;
if (!bq.isCoordDisabled()) {
query = SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch);
query = SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch, false);
}
}
if (query instanceof PhraseQuery) {
@ -1490,6 +1490,8 @@ public class ExtendedDismaxQParser extends QParser {
protected int qslop;
protected boolean stopwords;
protected boolean mmAutoRelax;
protected String altQ;
@ -1527,6 +1529,8 @@ public class ExtendedDismaxQParser extends QParser {
qslop = solrParams.getInt(DisMaxParams.QS, 0);
stopwords = solrParams.getBool(DMP.STOPWORDS, true);
mmAutoRelax = solrParams.getBool(DMP.MM_AUTORELAX, false);
altQ = solrParams.get( DisMaxParams.ALTQ );

View File

@ -560,11 +560,14 @@ public class SolrPluginUtils {
return out;
}
/**
* Checks the number of optional clauses in the query, and compares it
* with the specification string to determine the proper value to use.
*
* <p>
* If mmAutoRelax=true, we'll perform auto relaxation of mm if tokens
* are removed from some but not all DisMax clauses, as can happen when
* stopwords or punctuation tokens are removed in analysis.
* </p>
* <p>
* Details about the specification format can be found
* <a href="doc-files/min-should-match.html">here</a>
@ -589,29 +592,54 @@ public class SolrPluginUtils {
* <p>:TODO: should optimize the case where number is same
* as clauses to just make them all "required"
* </p>
*
* @param q The query as a BooleanQuery.Builder
* @param spec The mm spec
* @param mmAutoRelax whether to perform auto relaxation of mm if tokens are removed from some but not all DisMax clauses
*/
public static void setMinShouldMatch(BooleanQuery.Builder q, String spec) {
public static void setMinShouldMatch(BooleanQuery.Builder q, String spec, boolean mmAutoRelax) {
int optionalClauses = 0;
int maxDisjunctsSize = 0;
int optionalDismaxClauses = 0;
for (BooleanClause c : q.build().clauses()) {
if (c.getOccur() == Occur.SHOULD) {
optionalClauses++;
if (mmAutoRelax && c.getQuery() instanceof DisjunctionMaxQuery) {
int numDisjuncts = ((DisjunctionMaxQuery)c.getQuery()).getDisjuncts().size();
if (numDisjuncts>maxDisjunctsSize) {
maxDisjunctsSize = numDisjuncts;
optionalDismaxClauses = 1;
}
else if (numDisjuncts == maxDisjunctsSize) {
optionalDismaxClauses++;
}
} else {
optionalClauses++;
}
}
}
int msm = calculateMinShouldMatch(optionalClauses, spec);
int msm = calculateMinShouldMatch(optionalClauses + optionalDismaxClauses, spec);
if (0 < msm) {
q.setMinimumNumberShouldMatch(msm);
}
}
public static void setMinShouldMatch(BooleanQuery.Builder q, String spec) {
setMinShouldMatch(q, spec, false);
}
public static BooleanQuery setMinShouldMatch(BooleanQuery q, String spec) {
return setMinShouldMatch(q, spec, false);
}
public static BooleanQuery setMinShouldMatch(BooleanQuery q, String spec, boolean mmAutoRelax) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setDisableCoord(q.isCoordDisabled());
for (BooleanClause clause : q) {
builder.add(clause);
}
setMinShouldMatch(builder, spec);
setMinShouldMatch(builder, spec, mmAutoRelax);
return builder.build();
}

View File

@ -390,6 +390,66 @@ public class SolrPluginUtilsTest extends SolrTestCaseJ4 {
}
@Test
public void testMinShouldMatchAutoRelax() {
/* The basics should not be affected by autoRelax */
BooleanQuery.Builder q = new BooleanQuery.Builder();
q.add(new TermQuery(new Term("a","b")), Occur.SHOULD);
q.add(new TermQuery(new Term("a","c")), Occur.SHOULD);
q.add(new TermQuery(new Term("a","d")), Occur.SHOULD);
q.add(new TermQuery(new Term("a","d")), Occur.SHOULD);
SolrPluginUtils.setMinShouldMatch(q, "0", true);
assertEquals(0, q.build().getMinimumNumberShouldMatch());
SolrPluginUtils.setMinShouldMatch(q, "1", true);
assertEquals(1, q.build().getMinimumNumberShouldMatch());
SolrPluginUtils.setMinShouldMatch(q, "50%", true);
assertEquals(2, q.build().getMinimumNumberShouldMatch());
SolrPluginUtils.setMinShouldMatch(q, "99", true);
assertEquals(4, q.build().getMinimumNumberShouldMatch());
q.add(new TermQuery(new Term("a","e")), Occur.MUST);
q.add(new TermQuery(new Term("a","f")), Occur.MUST);
SolrPluginUtils.setMinShouldMatch(q, "50%", true);
assertEquals(2, q.build().getMinimumNumberShouldMatch());
/* Simulate stopwords through uneven disjuncts */
q = new BooleanQuery.Builder();
DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(0.0f);
dmq.add(new TermQuery(new Term("a","foo")));
q.add(dmq, Occur.SHOULD);
dmq = new DisjunctionMaxQuery(0.0f);
dmq.add(new TermQuery(new Term("a","foo")));
dmq.add(new TermQuery(new Term("b","foo")));
q.add(dmq, Occur.SHOULD);
dmq = new DisjunctionMaxQuery(0.0f);
dmq.add(new TermQuery(new Term("a","bar")));
dmq.add(new TermQuery(new Term("b","bar")));
q.add(dmq, Occur.SHOULD);
// Without relax
SolrPluginUtils.setMinShouldMatch(q, "100%", false);
assertEquals(3, q.build().getMinimumNumberShouldMatch());
// With relax
SolrPluginUtils.setMinShouldMatch(q, "100%", true);
assertEquals(2, q.build().getMinimumNumberShouldMatch());
// Still same result with a MUST clause extra
q.add(new TermQuery(new Term("a","must")), Occur.MUST);
SolrPluginUtils.setMinShouldMatch(q, "100%", true);
assertEquals(2, q.build().getMinimumNumberShouldMatch());
// Combination of dismax and non-dismax SHOULD clauses
q.add(new TermQuery(new Term("b","should")), Occur.SHOULD);
SolrPluginUtils.setMinShouldMatch(q, "100%", true);
assertEquals(3, q.build().getMinimumNumberShouldMatch());
}
/** macro */
public String pe(CharSequence s) {
return SolrPluginUtils.partialEscape(s).toString();

View File

@ -42,7 +42,12 @@ public interface DisMaxParams {
/** query and init param for MinShouldMatch specification */
public static String MM = "mm";
/**
* If set to true, will try to reduce MM if tokens are removed from some clauses but not all
*/
public static String MM_AUTORELAX = "mm.autoRelax";
/**
* query and init param for Phrase Slop value in phrase
* boost query (in pf fields)