SOLR-8318: SimpleQueryParser doesn't use MultiTermAnalysis for Fuzzy Queries. Thanks Tom

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1718233 13f79535-47bb-0310-9956-ffa450edef68
2015-12-06 22:41:55 +00:00 · 2015-12-06 22:41:55 +00:00 · ceb66d34c0
parent 0bc10ecb72
commit ceb66d34c0
3 changed files with 42 additions and 0 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -232,6 +232,8 @@ Other Changes

 * SOLR-8353: Support regex for skipping license checksums (Gregory Chanan)

+* SOLR-8313: SimpleQueryParser doesn't use MultiTermAnalysis for Fuzzy Queries (Tom Hill via Erick Erickson)
+
 ==================  5.4.0 ==================

 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
--- a/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java
@ -18,10 +18,12 @@ package org.apache.solr.search;
 */

 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.queryparser.simple.SimpleQueryParser;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BoostQuery;
+import org.apache.lucene.search.FuzzyQuery;
 import org.apache.lucene.search.Query;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.SimpleParams;
@ -216,6 +218,36 @@ public class SimpleQParserPlugin extends QParserPlugin {
      return simplify(bq.build());
    }

+    @Override
+    protected Query newFuzzyQuery(String text, int fuzziness) {
+      BooleanQuery.Builder bq = new BooleanQuery.Builder();
+      bq.setDisableCoord(true);
+
+      for (Map.Entry<String, Float> entry : weights.entrySet()) {
+        String field = entry.getKey();
+        FieldType type = schema.getFieldType(field);
+        Query fuzzy;
+
+        if (type instanceof TextField) {
+          // If the field type is a TextField then use the multi term analyzer.
+          Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer();
+          String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString();
+          fuzzy = new FuzzyQuery(new Term(entry.getKey(), term), fuzziness);
+        } else {
+          // If the type is *not* a TextField don't do any analysis.
+          fuzzy = new FuzzyQuery(new Term(entry.getKey(), text), fuzziness);
+        }
+
+        float boost = entry.getValue();
+        if (boost != 1f) {
+          fuzzy = new BoostQuery(fuzzy, boost);
+        }
+        bq.add(fuzzy, BooleanClause.Occur.SHOULD);
+      }
+
+      return simplify(bq.build());
+    }
+

  }
 }
--- a/solr/core/src/test/org/apache/solr/search/TestSimpleQParserPlugin.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSimpleQParserPlugin.java
@ -220,6 +220,14 @@ public class TestSimpleQParserPlugin extends SolrTestCaseJ4 {
    assertJQ(req("defType", "simple", "qf", "text0", "q", "BAR*"), "/response/numFound==0");
  }

+  /** Test that multiterm analysis chain is used for fuzzy. */
+  public void testFuzzyChain() throws Exception {
+    assertJQ(req("defType", "simple", "qf", "text0", "q", "FOOBAT~1"), "/response/numFound==1");
+    assertJQ(req("defType", "simple", "qf", "text0", "q", "Fóóba~1"), "/response/numFound==1");
+    assertJQ(req("defType", "simple", "qf", "text0", "q", "FOOB~2"), "/response/numFound==1");
+    assertJQ(req("defType", "simple", "qf", "text0", "q", "BAR~1"), "/response/numFound==0");
+  }
+
  public void testQueryAnalyzerIsUsed() throws Exception {
    // this should only match one doc, which was lower cased before being added
    assertJQ(req("defType", "simple", "qf", "text-query0", "q", "HELLO"),