mirror of https://github.com/apache/lucene.git
Bugfix for FuzzyQuery false negative (#1493)
Fix for Jira issue 9365 where search for `abc` doesn't match doc `abcd` if prefixlength = 3 and edit distance =1. The fix is to rewrite the FuzzyQuery as a regex if prefix length == search string length.
This commit is contained in:
parent
d06294e6ab
commit
28e47549c8
|
@ -20,6 +20,7 @@ package org.apache.lucene.search;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.SingleTermsEnum;
|
import org.apache.lucene.index.SingleTermsEnum;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
|
@ -99,9 +100,22 @@ public class FuzzyQuery extends MultiTermQuery {
|
||||||
this.prefixLength = prefixLength;
|
this.prefixLength = prefixLength;
|
||||||
this.transpositions = transpositions;
|
this.transpositions = transpositions;
|
||||||
this.maxExpansions = maxExpansions;
|
this.maxExpansions = maxExpansions;
|
||||||
setRewriteMethod(new MultiTermQuery.TopTermsBlendedFreqScoringRewrite(maxExpansions));
|
if (term.text().length() == prefixLength) {
|
||||||
|
setRewriteAsRegExpQuery();
|
||||||
|
} else {
|
||||||
|
setRewriteMethod(new MultiTermQuery.TopTermsBlendedFreqScoringRewrite(maxExpansions));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void setRewriteAsRegExpQuery() {
|
||||||
|
setRewriteMethod(new RewriteMethod() {
|
||||||
|
@Override
|
||||||
|
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
|
||||||
|
return new RegexpQuery(new Term(term.field(), term.text() + ".{0," + maxEdits + "}"));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calls {@link #FuzzyQuery(Term, int, int, int, boolean)
|
* Calls {@link #FuzzyQuery(Term, int, int, int, boolean)
|
||||||
* FuzzyQuery(term, maxEdits, prefixLength, defaultMaxExpansions, defaultTranspositions)}.
|
* FuzzyQuery(term, maxEdits, prefixLength, defaultMaxExpansions, defaultTranspositions)}.
|
||||||
|
@ -166,6 +180,8 @@ public class FuzzyQuery extends MultiTermQuery {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||||
|
|
|
@ -72,7 +72,33 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
||||||
reader.close();
|
reader.close();
|
||||||
directory.close();
|
directory.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testPrefixLengthEqualStringLength() throws Exception {
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
||||||
|
addDoc("bbab", writer);
|
||||||
|
addDoc("bbabc", writer);
|
||||||
|
addDoc("bbabcd", writer);
|
||||||
|
IndexReader reader = writer.getReader();
|
||||||
|
IndexSearcher searcher = newSearcher(reader);
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
int maxEdits = 1;
|
||||||
|
int prefixLength = 3;
|
||||||
|
FuzzyQuery query = new FuzzyQuery(new Term("field", "bba"), maxEdits, prefixLength);
|
||||||
|
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
|
||||||
|
assertEquals(1, hits.length);
|
||||||
|
|
||||||
|
maxEdits = 2;
|
||||||
|
query = new FuzzyQuery(new Term("field", "bba"), maxEdits, prefixLength);
|
||||||
|
hits = searcher.search(query, 1000).scoreDocs;
|
||||||
|
assertEquals(2, hits.length);
|
||||||
|
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
public void testFuzziness() throws Exception {
|
public void testFuzziness() throws Exception {
|
||||||
Directory directory = newDirectory();
|
Directory directory = newDirectory();
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
||||||
|
|
Loading…
Reference in New Issue