diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 9f37b68ff9e..fccb5f081c3 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -232,6 +232,8 @@ Other Changes * SOLR-8353: Support regex for skipping license checksums (Gregory Chanan) +* SOLR-8313: SimpleQueryParser doesn't use MultiTermAnalysis for Fuzzy Queries (Tom Hill via Erick Erickson) + ================== 5.4.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release diff --git a/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java index 5960e4fa1f3..2797189e2d7 100644 --- a/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/SimpleQParserPlugin.java @@ -18,10 +18,12 @@ package org.apache.solr.search; */ import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.simple.SimpleQueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.Query; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SimpleParams; @@ -216,6 +218,36 @@ public class SimpleQParserPlugin extends QParserPlugin { return simplify(bq.build()); } + @Override + protected Query newFuzzyQuery(String text, int fuzziness) { + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + bq.setDisableCoord(true); + + for (Map.Entry entry : weights.entrySet()) { + String field = entry.getKey(); + FieldType type = schema.getFieldType(field); + Query fuzzy; + + if (type instanceof TextField) { + // If the field type is a TextField then use the multi term analyzer. + Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer(); + String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString(); + fuzzy = new FuzzyQuery(new Term(entry.getKey(), term), fuzziness); + } else { + // If the type is *not* a TextField don't do any analysis. + fuzzy = new FuzzyQuery(new Term(entry.getKey(), text), fuzziness); + } + + float boost = entry.getValue(); + if (boost != 1f) { + fuzzy = new BoostQuery(fuzzy, boost); + } + bq.add(fuzzy, BooleanClause.Occur.SHOULD); + } + + return simplify(bq.build()); + } + } } diff --git a/solr/core/src/test/org/apache/solr/search/TestSimpleQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestSimpleQParserPlugin.java index 4e62e6e0c3f..3a9047de87a 100644 --- a/solr/core/src/test/org/apache/solr/search/TestSimpleQParserPlugin.java +++ b/solr/core/src/test/org/apache/solr/search/TestSimpleQParserPlugin.java @@ -220,6 +220,14 @@ public class TestSimpleQParserPlugin extends SolrTestCaseJ4 { assertJQ(req("defType", "simple", "qf", "text0", "q", "BAR*"), "/response/numFound==0"); } + /** Test that multiterm analysis chain is used for fuzzy. */ + public void testFuzzyChain() throws Exception { + assertJQ(req("defType", "simple", "qf", "text0", "q", "FOOBAT~1"), "/response/numFound==1"); + assertJQ(req("defType", "simple", "qf", "text0", "q", "Fóóba~1"), "/response/numFound==1"); + assertJQ(req("defType", "simple", "qf", "text0", "q", "FOOB~2"), "/response/numFound==1"); + assertJQ(req("defType", "simple", "qf", "text0", "q", "BAR~1"), "/response/numFound==0"); + } + public void testQueryAnalyzerIsUsed() throws Exception { // this should only match one doc, which was lower cased before being added assertJQ(req("defType", "simple", "qf", "text-query0", "q", "HELLO"),