From 4a63a328d4c40a22b2dd76a9a06023a80180ac29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B8ydahl?= Date: Sun, 2 Aug 2015 22:31:19 +0000 Subject: [PATCH] SOLR-3085: New edismax param mm.autoRelax which helps in certain cases of the stopwords/zero-hits issue git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1693833 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 5 ++ .../org/apache/solr/search/DisMaxQParser.java | 3 +- .../solr/search/ExtendedDismaxQParser.java | 10 +++- .../org/apache/solr/util/SolrPluginUtils.java | 40 +++++++++++-- .../apache/solr/util/SolrPluginUtilsTest.java | 60 +++++++++++++++++++ .../solr/common/params/DisMaxParams.java | 7 ++- 6 files changed, 114 insertions(+), 11 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 047e84b86b7..35efa8647c9 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -50,6 +50,11 @@ Upgrading from Solr 5.x Detailed Change List ---------------------- +New Features +---------------------- + +* SOLR-3085: New edismax param mm.autoRelax which helps in certain cases of the stopwords/zero-hits issue (janhoy) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/DisMaxQParser.java b/solr/core/src/java/org/apache/solr/search/DisMaxQParser.java index 338894feb38..eaee8b80ee4 100644 --- a/solr/core/src/java/org/apache/solr/search/DisMaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/DisMaxQParser.java @@ -243,7 +243,8 @@ public class DisMaxQParser extends QParser { if (dis instanceof BooleanQuery) { BooleanQuery.Builder t = new BooleanQuery.Builder(); SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery) dis); - SolrPluginUtils.setMinShouldMatch(t, minShouldMatch); + boolean mmAutoRelax = params.getBool(DisMaxParams.MM_AUTORELAX, false); + SolrPluginUtils.setMinShouldMatch(t, minShouldMatch, mmAutoRelax); query = t.build(); } return query; diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java index 1acbf64605a..4eef5d5075a 100644 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java @@ -299,7 +299,7 @@ public class ExtendedDismaxQParser extends QParser { if (query instanceof BooleanQuery) { BooleanQuery.Builder t = new BooleanQuery.Builder(); SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery)query); - SolrPluginUtils.setMinShouldMatch(t, config.minShouldMatch); + SolrPluginUtils.setMinShouldMatch(t, config.minShouldMatch, config.mmAutoRelax); query = t.build(); } return query; @@ -341,7 +341,7 @@ public class ExtendedDismaxQParser extends QParser { // were explicit operators (except for AND). boolean doMinMatched = doMinMatched(clauses, config.lowercaseOperators); if (doMinMatched && query instanceof BooleanQuery) { - query = SolrPluginUtils.setMinShouldMatch((BooleanQuery)query, config.minShouldMatch); + query = SolrPluginUtils.setMinShouldMatch((BooleanQuery)query, config.minShouldMatch, config.mmAutoRelax); } return query; } @@ -1238,7 +1238,7 @@ public class ExtendedDismaxQParser extends QParser { if (query instanceof BooleanQuery) { BooleanQuery bq = (BooleanQuery) query; if (!bq.isCoordDisabled()) { - query = SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch); + query = SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch, false); } } if (query instanceof PhraseQuery) { @@ -1490,6 +1490,8 @@ public class ExtendedDismaxQParser extends QParser { protected int qslop; protected boolean stopwords; + + protected boolean mmAutoRelax; protected String altQ; @@ -1527,6 +1529,8 @@ public class ExtendedDismaxQParser extends QParser { qslop = solrParams.getInt(DisMaxParams.QS, 0); stopwords = solrParams.getBool(DMP.STOPWORDS, true); + + mmAutoRelax = solrParams.getBool(DMP.MM_AUTORELAX, false); altQ = solrParams.get( DisMaxParams.ALTQ ); diff --git a/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java b/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java index 9b807c85eec..1486ca1ce16 100644 --- a/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java +++ b/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java @@ -560,11 +560,14 @@ public class SolrPluginUtils { return out; } - /** * Checks the number of optional clauses in the query, and compares it * with the specification string to determine the proper value to use. - * + *

+ * If mmAutoRelax=true, we'll perform auto relaxation of mm if tokens + * are removed from some but not all DisMax clauses, as can happen when + * stopwords or punctuation tokens are removed in analysis. + *

*

* Details about the specification format can be found * here @@ -589,29 +592,54 @@ public class SolrPluginUtils { *

:TODO: should optimize the case where number is same * as clauses to just make them all "required" *

+ * + * @param q The query as a BooleanQuery.Builder + * @param spec The mm spec + * @param mmAutoRelax whether to perform auto relaxation of mm if tokens are removed from some but not all DisMax clauses */ - public static void setMinShouldMatch(BooleanQuery.Builder q, String spec) { + public static void setMinShouldMatch(BooleanQuery.Builder q, String spec, boolean mmAutoRelax) { int optionalClauses = 0; + int maxDisjunctsSize = 0; + int optionalDismaxClauses = 0; for (BooleanClause c : q.build().clauses()) { if (c.getOccur() == Occur.SHOULD) { - optionalClauses++; + if (mmAutoRelax && c.getQuery() instanceof DisjunctionMaxQuery) { + int numDisjuncts = ((DisjunctionMaxQuery)c.getQuery()).getDisjuncts().size(); + if (numDisjuncts>maxDisjunctsSize) { + maxDisjunctsSize = numDisjuncts; + optionalDismaxClauses = 1; + } + else if (numDisjuncts == maxDisjunctsSize) { + optionalDismaxClauses++; + } + } else { + optionalClauses++; + } } } - int msm = calculateMinShouldMatch(optionalClauses, spec); + int msm = calculateMinShouldMatch(optionalClauses + optionalDismaxClauses, spec); if (0 < msm) { q.setMinimumNumberShouldMatch(msm); } } + public static void setMinShouldMatch(BooleanQuery.Builder q, String spec) { + setMinShouldMatch(q, spec, false); + } + public static BooleanQuery setMinShouldMatch(BooleanQuery q, String spec) { + return setMinShouldMatch(q, spec, false); + } + + public static BooleanQuery setMinShouldMatch(BooleanQuery q, String spec, boolean mmAutoRelax) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setDisableCoord(q.isCoordDisabled()); for (BooleanClause clause : q) { builder.add(clause); } - setMinShouldMatch(builder, spec); + setMinShouldMatch(builder, spec, mmAutoRelax); return builder.build(); } diff --git a/solr/core/src/test/org/apache/solr/util/SolrPluginUtilsTest.java b/solr/core/src/test/org/apache/solr/util/SolrPluginUtilsTest.java index dd5a9ff5330..ecc804ec36e 100644 --- a/solr/core/src/test/org/apache/solr/util/SolrPluginUtilsTest.java +++ b/solr/core/src/test/org/apache/solr/util/SolrPluginUtilsTest.java @@ -390,6 +390,66 @@ public class SolrPluginUtilsTest extends SolrTestCaseJ4 { } + @Test + public void testMinShouldMatchAutoRelax() { + /* The basics should not be affected by autoRelax */ + BooleanQuery.Builder q = new BooleanQuery.Builder(); + q.add(new TermQuery(new Term("a","b")), Occur.SHOULD); + q.add(new TermQuery(new Term("a","c")), Occur.SHOULD); + q.add(new TermQuery(new Term("a","d")), Occur.SHOULD); + q.add(new TermQuery(new Term("a","d")), Occur.SHOULD); + + SolrPluginUtils.setMinShouldMatch(q, "0", true); + assertEquals(0, q.build().getMinimumNumberShouldMatch()); + + SolrPluginUtils.setMinShouldMatch(q, "1", true); + assertEquals(1, q.build().getMinimumNumberShouldMatch()); + + SolrPluginUtils.setMinShouldMatch(q, "50%", true); + assertEquals(2, q.build().getMinimumNumberShouldMatch()); + + SolrPluginUtils.setMinShouldMatch(q, "99", true); + assertEquals(4, q.build().getMinimumNumberShouldMatch()); + + q.add(new TermQuery(new Term("a","e")), Occur.MUST); + q.add(new TermQuery(new Term("a","f")), Occur.MUST); + + SolrPluginUtils.setMinShouldMatch(q, "50%", true); + assertEquals(2, q.build().getMinimumNumberShouldMatch()); + + /* Simulate stopwords through uneven disjuncts */ + q = new BooleanQuery.Builder(); + DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(0.0f); + dmq.add(new TermQuery(new Term("a","foo"))); + q.add(dmq, Occur.SHOULD); + dmq = new DisjunctionMaxQuery(0.0f); + dmq.add(new TermQuery(new Term("a","foo"))); + dmq.add(new TermQuery(new Term("b","foo"))); + q.add(dmq, Occur.SHOULD); + dmq = new DisjunctionMaxQuery(0.0f); + dmq.add(new TermQuery(new Term("a","bar"))); + dmq.add(new TermQuery(new Term("b","bar"))); + q.add(dmq, Occur.SHOULD); + + // Without relax + SolrPluginUtils.setMinShouldMatch(q, "100%", false); + assertEquals(3, q.build().getMinimumNumberShouldMatch()); + + // With relax + SolrPluginUtils.setMinShouldMatch(q, "100%", true); + assertEquals(2, q.build().getMinimumNumberShouldMatch()); + + // Still same result with a MUST clause extra + q.add(new TermQuery(new Term("a","must")), Occur.MUST); + SolrPluginUtils.setMinShouldMatch(q, "100%", true); + assertEquals(2, q.build().getMinimumNumberShouldMatch()); + + // Combination of dismax and non-dismax SHOULD clauses + q.add(new TermQuery(new Term("b","should")), Occur.SHOULD); + SolrPluginUtils.setMinShouldMatch(q, "100%", true); + assertEquals(3, q.build().getMinimumNumberShouldMatch()); + } + /** macro */ public String pe(CharSequence s) { return SolrPluginUtils.partialEscape(s).toString(); diff --git a/solr/solrj/src/java/org/apache/solr/common/params/DisMaxParams.java b/solr/solrj/src/java/org/apache/solr/common/params/DisMaxParams.java index c2c268e2875..6440e8781ec 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/DisMaxParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/DisMaxParams.java @@ -42,7 +42,12 @@ public interface DisMaxParams { /** query and init param for MinShouldMatch specification */ public static String MM = "mm"; - + + /** + * If set to true, will try to reduce MM if tokens are removed from some clauses but not all + */ + public static String MM_AUTORELAX = "mm.autoRelax"; + /** * query and init param for Phrase Slop value in phrase * boost query (in pf fields)