LUCENE-1548: fix distance normalization in LevenshteinDistance to not produce negative distances

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@748534 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-02-27 14:07:12 +00:00
parent 1afef18d4a
commit 6248e14515
4 changed files with 22 additions and 15 deletions

View File

@ -25,6 +25,9 @@ Bug fixes
4. LUCENE-1514: ShingleMatrixFilter#next(Token) easily throws a StackOverflowException 4. LUCENE-1514: ShingleMatrixFilter#next(Token) easily throws a StackOverflowException
due to recursive invocation. (Karl Wettin) due to recursive invocation. (Karl Wettin)
5. LUCENE-1548: Fix distance normalization in LevenshteinDistance to
not produce negative distances (Thomas Morton via Mike McCandless)
New features New features
1. LUCENE-1470: Added TrieRangeQuery, a much faster implementation of 1. LUCENE-1470: Added TrieRangeQuery, a much faster implementation of

View File

@ -100,7 +100,7 @@ public final class LevensteinDistance implements StringDistance {
// our last action in the above loop was to switch d and p, so p now // our last action in the above loop was to switch d and p, so p now
// actually has the most recent cost counts // actually has the most recent cost counts
return 1.0f - ((float) p[n] / Math.min(other.length(), sa.length)); return 1.0f - ((float) p[n] / Math.max(other.length(), sa.length));
} }
} }

View File

@ -25,22 +25,22 @@ public class TestLevenshteinDistance extends TestCase {
public void testGetDistance() { public void testGetDistance() {
float d = sd.getDistance("al", "al"); float d = sd.getDistance("al", "al");
assertTrue(d == 1.0f); assertEquals(d,1.0f,0.001);
d = sd.getDistance("martha", "marhta"); d = sd.getDistance("martha", "marhta");
assertTrue(d > 0.66 && d <0.67); assertEquals(d,0.6666,0.001);
d = sd.getDistance("jones", "johnson"); d = sd.getDistance("jones", "johnson");
assertTrue(d > 0.199 && d < 0.201); assertEquals(d,0.4285,0.001);
d = sd.getDistance("abcvwxyz", "cabvwxyz"); d = sd.getDistance("abcvwxyz", "cabvwxyz");
assertTrue(d > 0.749 && d < 0.751); assertEquals(d,0.75,0.001);
d = sd.getDistance("dwayne", "duane"); d = sd.getDistance("dwayne", "duane");
assertTrue(d > 0.599 && d < 0.601); assertEquals(d,0.666,0.001);
d = sd.getDistance("dixon", "dicksonx"); d = sd.getDistance("dixon", "dicksonx");
assertTrue(d > 0.199 && d < 0.201); assertEquals(d,0.5,0.001);
d = sd.getDistance("six", "ten"); d = sd.getDistance("six", "ten");
assertTrue(d == 0f); assertEquals(d,0,0.001);
float d1 = sd.getDistance("zac ephron", "zac efron"); float d1 = sd.getDistance("zac ephron", "zac efron");
float d2 = sd.getDistance("zac ephron", "kai ephron"); float d2 = sd.getDistance("zac ephron", "kai ephron");
assertTrue(d1 < d2); assertEquals(d1,d2,0.001);
d1 = sd.getDistance("brittney spears", "britney spears"); d1 = sd.getDistance("brittney spears", "britney spears");
d2 = sd.getDistance("brittney spears", "brittney startzman"); d2 = sd.getDistance("brittney spears", "brittney startzman");
assertTrue(d1 > d2); assertTrue(d1 > d2);

View File

@ -129,19 +129,22 @@ public class TestSpellChecker extends TestCase {
assertEquals(similar[0], "five"); assertEquals(similar[0], "five");
similar = spellChecker.suggestSimilar("ive", 2); similar = spellChecker.suggestSimilar("ive", 2);
assertEquals(1, similar.length); assertEquals(2, similar.length);
assertEquals(similar[0], "five"); assertEquals(similar[0], "five");
assertEquals(similar[1], "nine");
similar = spellChecker.suggestSimilar("fives", 2); similar = spellChecker.suggestSimilar("fives", 2);
assertEquals(1, similar.length); assertEquals(1, similar.length);
assertEquals(similar[0], "five"); assertEquals(similar[0], "five");
similar = spellChecker.suggestSimilar("fie", 2); similar = spellChecker.suggestSimilar("fie", 2);
assertEquals(1, similar.length); assertEquals(2, similar.length);
assertEquals(similar[0], "five"); assertEquals(similar[0], "five");
assertEquals(similar[1], "nine");
similar = spellChecker.suggestSimilar("fi", 2); similar = spellChecker.suggestSimilar("fi", 2);
assertEquals(0, similar.length); assertEquals(1, similar.length);
assertEquals(similar[0], "five");
// test restraint to a field // test restraint to a field
similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false); similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
@ -151,8 +154,9 @@ public class TestSpellChecker extends TestCase {
assertEquals(1, similar.length); // there is the term thousand in the field field2 assertEquals(1, similar.length); // there is the term thousand in the field field2
similar = spellChecker.suggestSimilar("onety", 2); similar = spellChecker.suggestSimilar("onety", 2);
assertEquals(1, similar.length); assertEquals(2, similar.length);
assertEquals(similar[0], "ninety"); assertEquals(similar[0], "ninety");
assertEquals(similar[1], "one");
try { try {
similar = spellChecker.suggestSimilar("tousand", 10, r, null, false); similar = spellChecker.suggestSimilar("tousand", 10, r, null, false);
} catch (NullPointerException e) { } catch (NullPointerException e) {