mirror of https://github.com/apache/lucene.git
LUCENE-7439: FuzzyQuery now matches all terms within the specified edit distance, even if they are short
This commit is contained in:
parent
bd9962aba6
commit
471f90cf82
|
@ -32,6 +32,9 @@ Bug Fixes
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
|
|
||||||
|
* LUCENE-7439: FuzzyQuery now matches all terms within the specified
|
||||||
|
edit distance, even if they are short terms (Mike McCandless)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
|
@ -350,7 +350,7 @@ public class FuzzyTermsEnum extends TermsEnum {
|
||||||
final int codePointCount = UnicodeUtil.codePointCount(term);
|
final int codePointCount = UnicodeUtil.codePointCount(term);
|
||||||
final float similarity = 1.0f - ((float) ed / (float)
|
final float similarity = 1.0f - ((float) ed / (float)
|
||||||
(Math.min(codePointCount, termLength)));
|
(Math.min(codePointCount, termLength)));
|
||||||
if (similarity > minSimilarity) {
|
if (minSimilarity == 0 || similarity > minSimilarity) {
|
||||||
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
|
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
|
||||||
//System.out.println(" yes");
|
//System.out.println(" yes");
|
||||||
return AcceptStatus.YES;
|
return AcceptStatus.YES;
|
||||||
|
|
|
@ -160,7 +160,9 @@ public abstract class TopTermsRewrite<B> extends TermCollectingRewrite<B> {
|
||||||
|
|
||||||
for (final ScoreTerm st : scoreTerms) {
|
for (final ScoreTerm st : scoreTerms) {
|
||||||
final Term term = new Term(query.field, st.bytes.toBytesRef());
|
final Term term = new Term(query.field, st.bytes.toBytesRef());
|
||||||
addClause(b, term, st.termState.docFreq(), st.boost, st.termState); // add to query
|
// We allow negative term scores (fuzzy query does this, for example) while collecting the terms,
|
||||||
|
// but truncate such boosts to 0.0f when building the query:
|
||||||
|
addClause(b, term, st.termState.docFreq(), Math.max(0.0f, st.boost), st.termState); // add to query
|
||||||
}
|
}
|
||||||
return build(b);
|
return build(b);
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,15 +49,16 @@ public class FuzzyTermOnShortTermsTest extends LuceneTestCase {
|
||||||
countHits(a, new String[]{"abcde"}, new FuzzyQuery(new Term(FIELD, "abc"), 2), 1);
|
countHits(a, new String[]{"abcde"}, new FuzzyQuery(new Term(FIELD, "abc"), 2), 1);
|
||||||
countHits(a, new String[]{"abc"}, new FuzzyQuery(new Term(FIELD, "abcde"), 2), 1);
|
countHits(a, new String[]{"abc"}, new FuzzyQuery(new Term(FIELD, "abcde"), 2), 1);
|
||||||
|
|
||||||
//these don't
|
// LUCENE-7439: these now work as well:
|
||||||
countHits(a, new String[]{"ab"}, new FuzzyQuery(new Term(FIELD, "a"), 1), 0);
|
|
||||||
countHits(a, new String[]{"a"}, new FuzzyQuery(new Term(FIELD, "ab"), 1), 0);
|
|
||||||
|
|
||||||
countHits(a, new String[]{"abc"}, new FuzzyQuery(new Term(FIELD, "a"), 2), 0);
|
countHits(a, new String[]{"ab"}, new FuzzyQuery(new Term(FIELD, "a"), 1), 1);
|
||||||
countHits(a, new String[]{"a"}, new FuzzyQuery(new Term(FIELD, "abc"), 2), 0);
|
countHits(a, new String[]{"a"}, new FuzzyQuery(new Term(FIELD, "ab"), 1), 1);
|
||||||
|
|
||||||
countHits(a, new String[]{"abcd"}, new FuzzyQuery(new Term(FIELD, "ab"), 2), 0);
|
countHits(a, new String[]{"abc"}, new FuzzyQuery(new Term(FIELD, "a"), 2), 1);
|
||||||
countHits(a, new String[]{"ab"}, new FuzzyQuery(new Term(FIELD, "abcd"), 2), 0);
|
countHits(a, new String[]{"a"}, new FuzzyQuery(new Term(FIELD, "abc"), 2), 1);
|
||||||
|
|
||||||
|
countHits(a, new String[]{"abcd"}, new FuzzyQuery(new Term(FIELD, "ab"), 2), 1);
|
||||||
|
countHits(a, new String[]{"ab"}, new FuzzyQuery(new Term(FIELD, "abcd"), 2), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void countHits(Analyzer analyzer, String[] docs, Query q, int expected) throws Exception {
|
private void countHits(Analyzer analyzer, String[] docs, Query q, int expected) throws Exception {
|
||||||
|
|
|
@ -543,14 +543,12 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int ed = getDistance(term, queryTerm);
|
int ed = getDistance(term, queryTerm);
|
||||||
if (Math.min(queryTerm.length(), term.length()) > ed) {
|
|
||||||
float score = 1f - (float) ed / (float) Math.min(queryTerm.length(), term.length());
|
float score = 1f - (float) ed / (float) Math.min(queryTerm.length(), term.length());
|
||||||
while (ed < 3) {
|
while (ed < 3) {
|
||||||
expected[ed].add(new TermAndScore(term, score));
|
expected[ed].add(new TermAndScore(term, score));
|
||||||
ed++;
|
ed++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
for(int ed=0;ed<3;ed++) {
|
for(int ed=0;ed<3;ed++) {
|
||||||
Collections.sort(expected[ed]);
|
Collections.sort(expected[ed]);
|
||||||
|
|
|
@ -16,9 +16,11 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.sandbox.queries;
|
package org.apache.lucene.sandbox.queries;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
@ -472,8 +474,17 @@ public class TestSlowFuzzyQuery extends LuceneTestCase {
|
||||||
q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
|
q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
|
||||||
hits = searcher.search(q, 10).scoreDocs;
|
hits = searcher.search(q, 10).scoreDocs;
|
||||||
assertEquals(2, hits.length);
|
assertEquals(2, hits.length);
|
||||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
|
||||||
assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
|
// We cannot expect a particular order since both hits 0.0 score:
|
||||||
|
Set<String> actual = new HashSet<>();
|
||||||
|
actual.add(searcher.doc(hits[0].doc).get("field"));
|
||||||
|
actual.add(searcher.doc(hits[1].doc).get("field"));
|
||||||
|
|
||||||
|
Set<String> expected = new HashSet<>();
|
||||||
|
expected.add("test");
|
||||||
|
expected.add("foobar");
|
||||||
|
|
||||||
|
assertEquals(expected, actual);
|
||||||
|
|
||||||
reader.close();
|
reader.close();
|
||||||
index.close();
|
index.close();
|
||||||
|
|
Loading…
Reference in New Issue