mirror of https://github.com/apache/lucene.git
Bugfix for FuzzyQuery false negative (#1493)
Fix for Jira issue 9365 where search for `abc` doesn't match doc `abcd` if prefixlength = 3 and edit distance =1. The fix is to rewrite the FuzzyQuery as a regex if prefix length == search string length.
This commit is contained in:
parent
d06294e6ab
commit
28e47549c8
|
@ -20,6 +20,7 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.SingleTermsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
@ -99,9 +100,22 @@ public class FuzzyQuery extends MultiTermQuery {
|
|||
this.prefixLength = prefixLength;
|
||||
this.transpositions = transpositions;
|
||||
this.maxExpansions = maxExpansions;
|
||||
setRewriteMethod(new MultiTermQuery.TopTermsBlendedFreqScoringRewrite(maxExpansions));
|
||||
if (term.text().length() == prefixLength) {
|
||||
setRewriteAsRegExpQuery();
|
||||
} else {
|
||||
setRewriteMethod(new MultiTermQuery.TopTermsBlendedFreqScoringRewrite(maxExpansions));
|
||||
}
|
||||
}
|
||||
|
||||
private void setRewriteAsRegExpQuery() {
|
||||
setRewriteMethod(new RewriteMethod() {
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
|
||||
return new RegexpQuery(new Term(term.field(), term.text() + ".{0," + maxEdits + "}"));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link #FuzzyQuery(Term, int, int, int, boolean)
|
||||
* FuzzyQuery(term, maxEdits, prefixLength, defaultMaxExpansions, defaultTranspositions)}.
|
||||
|
@ -166,6 +180,8 @@ public class FuzzyQuery extends MultiTermQuery {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||
|
|
|
@ -72,7 +72,33 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
|||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testPrefixLengthEqualStringLength() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
||||
addDoc("bbab", writer);
|
||||
addDoc("bbabc", writer);
|
||||
addDoc("bbabcd", writer);
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
writer.close();
|
||||
|
||||
int maxEdits = 1;
|
||||
int prefixLength = 3;
|
||||
FuzzyQuery query = new FuzzyQuery(new Term("field", "bba"), maxEdits, prefixLength);
|
||||
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
|
||||
maxEdits = 2;
|
||||
query = new FuzzyQuery(new Term("field", "bba"), maxEdits, prefixLength);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(2, hits.length);
|
||||
|
||||
|
||||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testFuzziness() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
||||
|
|
Loading…
Reference in New Issue