mirror of https://github.com/apache/lucene.git
LUCENE-7439: FuzzyQuery now matches all terms within the specified edit distance, even if they are short
This commit is contained in:
parent
bd9962aba6
commit
471f90cf82
|
@ -32,6 +32,9 @@ Bug Fixes
|
|||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7439: FuzzyQuery now matches all terms within the specified
|
||||
edit distance, even if they are short terms (Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
Other
|
||||
|
|
|
@ -350,7 +350,7 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||
final int codePointCount = UnicodeUtil.codePointCount(term);
|
||||
final float similarity = 1.0f - ((float) ed / (float)
|
||||
(Math.min(codePointCount, termLength)));
|
||||
if (similarity > minSimilarity) {
|
||||
if (minSimilarity == 0 || similarity > minSimilarity) {
|
||||
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
|
||||
//System.out.println(" yes");
|
||||
return AcceptStatus.YES;
|
||||
|
|
|
@ -160,7 +160,9 @@ public abstract class TopTermsRewrite<B> extends TermCollectingRewrite<B> {
|
|||
|
||||
for (final ScoreTerm st : scoreTerms) {
|
||||
final Term term = new Term(query.field, st.bytes.toBytesRef());
|
||||
addClause(b, term, st.termState.docFreq(), st.boost, st.termState); // add to query
|
||||
// We allow negative term scores (fuzzy query does this, for example) while collecting the terms,
|
||||
// but truncate such boosts to 0.0f when building the query:
|
||||
addClause(b, term, st.termState.docFreq(), Math.max(0.0f, st.boost), st.termState); // add to query
|
||||
}
|
||||
return build(b);
|
||||
}
|
||||
|
|
|
@ -49,15 +49,16 @@ public class FuzzyTermOnShortTermsTest extends LuceneTestCase {
|
|||
countHits(a, new String[]{"abcde"}, new FuzzyQuery(new Term(FIELD, "abc"), 2), 1);
|
||||
countHits(a, new String[]{"abc"}, new FuzzyQuery(new Term(FIELD, "abcde"), 2), 1);
|
||||
|
||||
//these don't
|
||||
countHits(a, new String[]{"ab"}, new FuzzyQuery(new Term(FIELD, "a"), 1), 0);
|
||||
countHits(a, new String[]{"a"}, new FuzzyQuery(new Term(FIELD, "ab"), 1), 0);
|
||||
// LUCENE-7439: these now work as well:
|
||||
|
||||
countHits(a, new String[]{"abc"}, new FuzzyQuery(new Term(FIELD, "a"), 2), 0);
|
||||
countHits(a, new String[]{"a"}, new FuzzyQuery(new Term(FIELD, "abc"), 2), 0);
|
||||
countHits(a, new String[]{"ab"}, new FuzzyQuery(new Term(FIELD, "a"), 1), 1);
|
||||
countHits(a, new String[]{"a"}, new FuzzyQuery(new Term(FIELD, "ab"), 1), 1);
|
||||
|
||||
countHits(a, new String[]{"abcd"}, new FuzzyQuery(new Term(FIELD, "ab"), 2), 0);
|
||||
countHits(a, new String[]{"ab"}, new FuzzyQuery(new Term(FIELD, "abcd"), 2), 0);
|
||||
countHits(a, new String[]{"abc"}, new FuzzyQuery(new Term(FIELD, "a"), 2), 1);
|
||||
countHits(a, new String[]{"a"}, new FuzzyQuery(new Term(FIELD, "abc"), 2), 1);
|
||||
|
||||
countHits(a, new String[]{"abcd"}, new FuzzyQuery(new Term(FIELD, "ab"), 2), 1);
|
||||
countHits(a, new String[]{"ab"}, new FuzzyQuery(new Term(FIELD, "abcd"), 2), 1);
|
||||
}
|
||||
|
||||
private void countHits(Analyzer analyzer, String[] docs, Query q, int expected) throws Exception {
|
||||
|
|
|
@ -543,14 +543,12 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
|||
continue;
|
||||
}
|
||||
int ed = getDistance(term, queryTerm);
|
||||
if (Math.min(queryTerm.length(), term.length()) > ed) {
|
||||
float score = 1f - (float) ed / (float) Math.min(queryTerm.length(), term.length());
|
||||
while (ed < 3) {
|
||||
expected[ed].add(new TermAndScore(term, score));
|
||||
ed++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(int ed=0;ed<3;ed++) {
|
||||
Collections.sort(expected[ed]);
|
||||
|
|
|
@ -16,9 +16,11 @@
|
|||
*/
|
||||
package org.apache.lucene.sandbox.queries;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -472,8 +474,17 @@ public class TestSlowFuzzyQuery extends LuceneTestCase {
|
|||
q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
|
||||
hits = searcher.search(q, 10).scoreDocs;
|
||||
assertEquals(2, hits.length);
|
||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
|
||||
|
||||
// We cannot expect a particular order since both hits 0.0 score:
|
||||
Set<String> actual = new HashSet<>();
|
||||
actual.add(searcher.doc(hits[0].doc).get("field"));
|
||||
actual.add(searcher.doc(hits[1].doc).get("field"));
|
||||
|
||||
Set<String> expected = new HashSet<>();
|
||||
expected.add("test");
|
||||
expected.add("foobar");
|
||||
|
||||
assertEquals(expected, actual);
|
||||
|
||||
reader.close();
|
||||
index.close();
|
||||
|
|
Loading…
Reference in New Issue