Bugfix for FuzzyQuery false negative (#1493)

Fix for Jira issue 9365 where search for `abc` doesn't match doc `abcd` if prefixlength = 3 and edit distance =1.
The fix is to rewrite the FuzzyQuery as a regex if prefix length == search string length.
This commit is contained in:
markharwood 2020-05-07 15:10:00 +01:00 committed by GitHub
parent d06294e6ab
commit 28e47549c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 1 deletions

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.SingleTermsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
@ -99,9 +100,22 @@ public class FuzzyQuery extends MultiTermQuery {
this.prefixLength = prefixLength;
this.transpositions = transpositions;
this.maxExpansions = maxExpansions;
setRewriteMethod(new MultiTermQuery.TopTermsBlendedFreqScoringRewrite(maxExpansions));
if (term.text().length() == prefixLength) {
setRewriteAsRegExpQuery();
} else {
setRewriteMethod(new MultiTermQuery.TopTermsBlendedFreqScoringRewrite(maxExpansions));
}
}
private void setRewriteAsRegExpQuery() {
setRewriteMethod(new RewriteMethod() {
@Override
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
return new RegexpQuery(new Term(term.field(), term.text() + ".{0," + maxEdits + "}"));
}
});
}
/**
* Calls {@link #FuzzyQuery(Term, int, int, int, boolean)
* FuzzyQuery(term, maxEdits, prefixLength, defaultMaxExpansions, defaultTranspositions)}.
@ -166,6 +180,8 @@ public class FuzzyQuery extends MultiTermQuery {
}
}
}
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {

View File

@ -72,7 +72,33 @@ public class TestFuzzyQuery extends LuceneTestCase {
reader.close();
directory.close();
}
public void testPrefixLengthEqualStringLength() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
addDoc("bbab", writer);
addDoc("bbabc", writer);
addDoc("bbabcd", writer);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
writer.close();
int maxEdits = 1;
int prefixLength = 3;
FuzzyQuery query = new FuzzyQuery(new Term("field", "bba"), maxEdits, prefixLength);
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
maxEdits = 2;
query = new FuzzyQuery(new Term("field", "bba"), maxEdits, prefixLength);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(2, hits.length);
reader.close();
directory.close();
}
public void testFuzziness() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);