mirror of https://github.com/apache/lucene.git
SOLR-8318: SimpleQueryParser doesn't use MultiTermAnalysis for Fuzzy Queries. Thanks Tom
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1718233 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0bc10ecb72
commit
ceb66d34c0
|
@ -232,6 +232,8 @@ Other Changes
|
|||
|
||||
* SOLR-8353: Support regex for skipping license checksums (Gregory Chanan)
|
||||
|
||||
* SOLR-8313: SimpleQueryParser doesn't use MultiTermAnalysis for Fuzzy Queries (Tom Hill via Erick Erickson)
|
||||
|
||||
================== 5.4.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
||||
|
|
|
@ -18,10 +18,12 @@ package org.apache.solr.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryparser.simple.SimpleQueryParser;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.SimpleParams;
|
||||
|
@ -216,6 +218,36 @@ public class SimpleQParserPlugin extends QParserPlugin {
|
|||
return simplify(bq.build());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query newFuzzyQuery(String text, int fuzziness) {
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
bq.setDisableCoord(true);
|
||||
|
||||
for (Map.Entry<String, Float> entry : weights.entrySet()) {
|
||||
String field = entry.getKey();
|
||||
FieldType type = schema.getFieldType(field);
|
||||
Query fuzzy;
|
||||
|
||||
if (type instanceof TextField) {
|
||||
// If the field type is a TextField then use the multi term analyzer.
|
||||
Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer();
|
||||
String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString();
|
||||
fuzzy = new FuzzyQuery(new Term(entry.getKey(), term), fuzziness);
|
||||
} else {
|
||||
// If the type is *not* a TextField don't do any analysis.
|
||||
fuzzy = new FuzzyQuery(new Term(entry.getKey(), text), fuzziness);
|
||||
}
|
||||
|
||||
float boost = entry.getValue();
|
||||
if (boost != 1f) {
|
||||
fuzzy = new BoostQuery(fuzzy, boost);
|
||||
}
|
||||
bq.add(fuzzy, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
|
||||
return simplify(bq.build());
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -220,6 +220,14 @@ public class TestSimpleQParserPlugin extends SolrTestCaseJ4 {
|
|||
assertJQ(req("defType", "simple", "qf", "text0", "q", "BAR*"), "/response/numFound==0");
|
||||
}
|
||||
|
||||
/** Test that multiterm analysis chain is used for fuzzy. */
|
||||
public void testFuzzyChain() throws Exception {
|
||||
assertJQ(req("defType", "simple", "qf", "text0", "q", "FOOBAT~1"), "/response/numFound==1");
|
||||
assertJQ(req("defType", "simple", "qf", "text0", "q", "Fóóba~1"), "/response/numFound==1");
|
||||
assertJQ(req("defType", "simple", "qf", "text0", "q", "FOOB~2"), "/response/numFound==1");
|
||||
assertJQ(req("defType", "simple", "qf", "text0", "q", "BAR~1"), "/response/numFound==0");
|
||||
}
|
||||
|
||||
public void testQueryAnalyzerIsUsed() throws Exception {
|
||||
// this should only match one doc, which was lower cased before being added
|
||||
assertJQ(req("defType", "simple", "qf", "text-query0", "q", "HELLO"),
|
||||
|
|
Loading…
Reference in New Issue