mirror of https://github.com/apache/lucene.git
LUCENE-1548: fix distance normalization in LevenshteinDistance to not produce negative distances
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@748534 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1afef18d4a
commit
6248e14515
|
@ -25,6 +25,9 @@ Bug fixes
|
|||
4. LUCENE-1514: ShingleMatrixFilter#next(Token) easily throws a StackOverflowException
|
||||
due to recursive invocation. (Karl Wettin)
|
||||
|
||||
5. LUCENE-1548: Fix distance normalization in LevenshteinDistance to
|
||||
not produce negative distances (Thomas Morton via Mike McCandless)
|
||||
|
||||
New features
|
||||
|
||||
1. LUCENE-1470: Added TrieRangeQuery, a much faster implementation of
|
||||
|
|
|
@ -100,7 +100,7 @@ public final class LevensteinDistance implements StringDistance {
|
|||
|
||||
// our last action in the above loop was to switch d and p, so p now
|
||||
// actually has the most recent cost counts
|
||||
return 1.0f - ((float) p[n] / Math.min(other.length(), sa.length));
|
||||
return 1.0f - ((float) p[n] / Math.max(other.length(), sa.length));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -25,25 +25,25 @@ public class TestLevenshteinDistance extends TestCase {
|
|||
|
||||
public void testGetDistance() {
|
||||
float d = sd.getDistance("al", "al");
|
||||
assertTrue(d == 1.0f);
|
||||
assertEquals(d,1.0f,0.001);
|
||||
d = sd.getDistance("martha", "marhta");
|
||||
assertTrue(d > 0.66 && d <0.67);
|
||||
assertEquals(d,0.6666,0.001);
|
||||
d = sd.getDistance("jones", "johnson");
|
||||
assertTrue(d > 0.199 && d < 0.201);
|
||||
assertEquals(d,0.4285,0.001);
|
||||
d = sd.getDistance("abcvwxyz", "cabvwxyz");
|
||||
assertTrue(d > 0.749 && d < 0.751);
|
||||
assertEquals(d,0.75,0.001);
|
||||
d = sd.getDistance("dwayne", "duane");
|
||||
assertTrue(d > 0.599 && d < 0.601);
|
||||
assertEquals(d,0.666,0.001);
|
||||
d = sd.getDistance("dixon", "dicksonx");
|
||||
assertTrue(d > 0.199 && d < 0.201);
|
||||
assertEquals(d,0.5,0.001);
|
||||
d = sd.getDistance("six", "ten");
|
||||
assertTrue(d == 0f);
|
||||
assertEquals(d,0,0.001);
|
||||
float d1 = sd.getDistance("zac ephron", "zac efron");
|
||||
float d2 = sd.getDistance("zac ephron", "kai ephron");
|
||||
assertTrue(d1 < d2);
|
||||
assertEquals(d1,d2,0.001);
|
||||
d1 = sd.getDistance("brittney spears", "britney spears");
|
||||
d2 = sd.getDistance("brittney spears", "brittney startzman");
|
||||
assertTrue(d1 > d2);
|
||||
assertTrue(d1 > d2);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -129,20 +129,23 @@ public class TestSpellChecker extends TestCase {
|
|||
assertEquals(similar[0], "five");
|
||||
|
||||
similar = spellChecker.suggestSimilar("ive", 2);
|
||||
assertEquals(1, similar.length);
|
||||
assertEquals(2, similar.length);
|
||||
assertEquals(similar[0], "five");
|
||||
assertEquals(similar[1], "nine");
|
||||
|
||||
similar = spellChecker.suggestSimilar("fives", 2);
|
||||
assertEquals(1, similar.length);
|
||||
assertEquals(similar[0], "five");
|
||||
|
||||
similar = spellChecker.suggestSimilar("fie", 2);
|
||||
assertEquals(2, similar.length);
|
||||
assertEquals(similar[0], "five");
|
||||
assertEquals(similar[1], "nine");
|
||||
|
||||
similar = spellChecker.suggestSimilar("fi", 2);
|
||||
assertEquals(1, similar.length);
|
||||
assertEquals(similar[0], "five");
|
||||
|
||||
similar = spellChecker.suggestSimilar("fi", 2);
|
||||
assertEquals(0, similar.length);
|
||||
|
||||
// test restraint to a field
|
||||
similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
|
||||
assertEquals(0, similar.length); // there isn't the term thousand in the field field1
|
||||
|
@ -151,8 +154,9 @@ public class TestSpellChecker extends TestCase {
|
|||
assertEquals(1, similar.length); // there is the term thousand in the field field2
|
||||
|
||||
similar = spellChecker.suggestSimilar("onety", 2);
|
||||
assertEquals(1, similar.length);
|
||||
assertEquals(2, similar.length);
|
||||
assertEquals(similar[0], "ninety");
|
||||
assertEquals(similar[1], "one");
|
||||
try {
|
||||
similar = spellChecker.suggestSimilar("tousand", 10, r, null, false);
|
||||
} catch (NullPointerException e) {
|
||||
|
|
Loading…
Reference in New Issue