SOLR-8318: SimpleQueryParser doesn't use MultiTermAnalysis for Fuzzy Queries. Thanks Tom

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1718233 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erick Erickson 2015-12-06 22:41:55 +00:00
parent 0bc10ecb72
commit ceb66d34c0
3 changed files with 42 additions and 0 deletions

View File

@ -232,6 +232,8 @@ Other Changes
* SOLR-8353: Support regex for skipping license checksums (Gregory Chanan) * SOLR-8353: Support regex for skipping license checksums (Gregory Chanan)
* SOLR-8313: SimpleQueryParser doesn't use MultiTermAnalysis for Fuzzy Queries (Tom Hill via Erick Erickson)
================== 5.4.0 ================== ================== 5.4.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

View File

@ -18,10 +18,12 @@ package org.apache.solr.search;
*/ */
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.simple.SimpleQueryParser; import org.apache.lucene.queryparser.simple.SimpleQueryParser;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SimpleParams; import org.apache.solr.common.params.SimpleParams;
@ -216,6 +218,36 @@ public class SimpleQParserPlugin extends QParserPlugin {
return simplify(bq.build()); return simplify(bq.build());
} }
@Override
protected Query newFuzzyQuery(String text, int fuzziness) {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.setDisableCoord(true);
for (Map.Entry<String, Float> entry : weights.entrySet()) {
String field = entry.getKey();
FieldType type = schema.getFieldType(field);
Query fuzzy;
if (type instanceof TextField) {
// If the field type is a TextField then use the multi term analyzer.
Analyzer analyzer = ((TextField)type).getMultiTermAnalyzer();
String term = TextField.analyzeMultiTerm(field, text, analyzer).utf8ToString();
fuzzy = new FuzzyQuery(new Term(entry.getKey(), term), fuzziness);
} else {
// If the type is *not* a TextField don't do any analysis.
fuzzy = new FuzzyQuery(new Term(entry.getKey(), text), fuzziness);
}
float boost = entry.getValue();
if (boost != 1f) {
fuzzy = new BoostQuery(fuzzy, boost);
}
bq.add(fuzzy, BooleanClause.Occur.SHOULD);
}
return simplify(bq.build());
}
} }
} }

View File

@ -220,6 +220,14 @@ public class TestSimpleQParserPlugin extends SolrTestCaseJ4 {
assertJQ(req("defType", "simple", "qf", "text0", "q", "BAR*"), "/response/numFound==0"); assertJQ(req("defType", "simple", "qf", "text0", "q", "BAR*"), "/response/numFound==0");
} }
/** Test that multiterm analysis chain is used for fuzzy. */
public void testFuzzyChain() throws Exception {
assertJQ(req("defType", "simple", "qf", "text0", "q", "FOOBAT~1"), "/response/numFound==1");
assertJQ(req("defType", "simple", "qf", "text0", "q", "Fóóba~1"), "/response/numFound==1");
assertJQ(req("defType", "simple", "qf", "text0", "q", "FOOB~2"), "/response/numFound==1");
assertJQ(req("defType", "simple", "qf", "text0", "q", "BAR~1"), "/response/numFound==0");
}
public void testQueryAnalyzerIsUsed() throws Exception { public void testQueryAnalyzerIsUsed() throws Exception {
// this should only match one doc, which was lower cased before being added // this should only match one doc, which was lower cased before being added
assertJQ(req("defType", "simple", "qf", "text-query0", "q", "HELLO"), assertJQ(req("defType", "simple", "qf", "text-query0", "q", "HELLO"),