LUCENE-1548: fix distance normalization in LevenshteinDistance to not produce negative distances

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@748534 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-02-27 14:07:12 +00:00
parent 1afef18d4a
commit 6248e14515
4 changed files with 22 additions and 15 deletions

View File

@ -25,6 +25,9 @@ Bug fixes
4. LUCENE-1514: ShingleMatrixFilter#next(Token) easily throws a StackOverflowException
due to recursive invocation. (Karl Wettin)
5. LUCENE-1548: Fix distance normalization in LevenshteinDistance to
not produce negative distances (Thomas Morton via Mike McCandless)
New features
1. LUCENE-1470: Added TrieRangeQuery, a much faster implementation of

View File

@ -100,7 +100,7 @@ public final class LevensteinDistance implements StringDistance {
// our last action in the above loop was to switch d and p, so p now
// actually has the most recent cost counts
return 1.0f - ((float) p[n] / Math.min(other.length(), sa.length));
return 1.0f - ((float) p[n] / Math.max(other.length(), sa.length));
}
}

View File

@ -25,25 +25,25 @@ public class TestLevenshteinDistance extends TestCase {
public void testGetDistance() {
float d = sd.getDistance("al", "al");
assertTrue(d == 1.0f);
assertEquals(d,1.0f,0.001);
d = sd.getDistance("martha", "marhta");
assertTrue(d > 0.66 && d <0.67);
assertEquals(d,0.6666,0.001);
d = sd.getDistance("jones", "johnson");
assertTrue(d > 0.199 && d < 0.201);
assertEquals(d,0.4285,0.001);
d = sd.getDistance("abcvwxyz", "cabvwxyz");
assertTrue(d > 0.749 && d < 0.751);
assertEquals(d,0.75,0.001);
d = sd.getDistance("dwayne", "duane");
assertTrue(d > 0.599 && d < 0.601);
assertEquals(d,0.666,0.001);
d = sd.getDistance("dixon", "dicksonx");
assertTrue(d > 0.199 && d < 0.201);
assertEquals(d,0.5,0.001);
d = sd.getDistance("six", "ten");
assertTrue(d == 0f);
assertEquals(d,0,0.001);
float d1 = sd.getDistance("zac ephron", "zac efron");
float d2 = sd.getDistance("zac ephron", "kai ephron");
assertTrue(d1 < d2);
assertEquals(d1,d2,0.001);
d1 = sd.getDistance("brittney spears", "britney spears");
d2 = sd.getDistance("brittney spears", "brittney startzman");
assertTrue(d1 > d2);
assertTrue(d1 > d2);
}
}

View File

@ -129,20 +129,23 @@ public class TestSpellChecker extends TestCase {
assertEquals(similar[0], "five");
similar = spellChecker.suggestSimilar("ive", 2);
assertEquals(1, similar.length);
assertEquals(2, similar.length);
assertEquals(similar[0], "five");
assertEquals(similar[1], "nine");
similar = spellChecker.suggestSimilar("fives", 2);
assertEquals(1, similar.length);
assertEquals(similar[0], "five");
similar = spellChecker.suggestSimilar("fie", 2);
assertEquals(2, similar.length);
assertEquals(similar[0], "five");
assertEquals(similar[1], "nine");
similar = spellChecker.suggestSimilar("fi", 2);
assertEquals(1, similar.length);
assertEquals(similar[0], "five");
similar = spellChecker.suggestSimilar("fi", 2);
assertEquals(0, similar.length);
// test restraint to a field
similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
assertEquals(0, similar.length); // there isn't the term thousand in the field field1
@ -151,8 +154,9 @@ public class TestSpellChecker extends TestCase {
assertEquals(1, similar.length); // there is the term thousand in the field field2
similar = spellChecker.suggestSimilar("onety", 2);
assertEquals(1, similar.length);
assertEquals(2, similar.length);
assertEquals(similar[0], "ninety");
assertEquals(similar[1], "one");
try {
similar = spellChecker.suggestSimilar("tousand", 10, r, null, false);
} catch (NullPointerException e) {