mirror of https://github.com/apache/lucene.git
LUCENE-1548: fix distance normalization in LevenshteinDistance to not produce negative distances
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@748534 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1afef18d4a
commit
6248e14515
|
@ -25,6 +25,9 @@ Bug fixes
|
||||||
4. LUCENE-1514: ShingleMatrixFilter#next(Token) easily throws a StackOverflowException
|
4. LUCENE-1514: ShingleMatrixFilter#next(Token) easily throws a StackOverflowException
|
||||||
due to recursive invocation. (Karl Wettin)
|
due to recursive invocation. (Karl Wettin)
|
||||||
|
|
||||||
|
5. LUCENE-1548: Fix distance normalization in LevenshteinDistance to
|
||||||
|
not produce negative distances (Thomas Morton via Mike McCandless)
|
||||||
|
|
||||||
New features
|
New features
|
||||||
|
|
||||||
1. LUCENE-1470: Added TrieRangeQuery, a much faster implementation of
|
1. LUCENE-1470: Added TrieRangeQuery, a much faster implementation of
|
||||||
|
|
|
@ -100,7 +100,7 @@ public final class LevensteinDistance implements StringDistance {
|
||||||
|
|
||||||
// our last action in the above loop was to switch d and p, so p now
|
// our last action in the above loop was to switch d and p, so p now
|
||||||
// actually has the most recent cost counts
|
// actually has the most recent cost counts
|
||||||
return 1.0f - ((float) p[n] / Math.min(other.length(), sa.length));
|
return 1.0f - ((float) p[n] / Math.max(other.length(), sa.length));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,22 +25,22 @@ public class TestLevenshteinDistance extends TestCase {
|
||||||
|
|
||||||
public void testGetDistance() {
|
public void testGetDistance() {
|
||||||
float d = sd.getDistance("al", "al");
|
float d = sd.getDistance("al", "al");
|
||||||
assertTrue(d == 1.0f);
|
assertEquals(d,1.0f,0.001);
|
||||||
d = sd.getDistance("martha", "marhta");
|
d = sd.getDistance("martha", "marhta");
|
||||||
assertTrue(d > 0.66 && d <0.67);
|
assertEquals(d,0.6666,0.001);
|
||||||
d = sd.getDistance("jones", "johnson");
|
d = sd.getDistance("jones", "johnson");
|
||||||
assertTrue(d > 0.199 && d < 0.201);
|
assertEquals(d,0.4285,0.001);
|
||||||
d = sd.getDistance("abcvwxyz", "cabvwxyz");
|
d = sd.getDistance("abcvwxyz", "cabvwxyz");
|
||||||
assertTrue(d > 0.749 && d < 0.751);
|
assertEquals(d,0.75,0.001);
|
||||||
d = sd.getDistance("dwayne", "duane");
|
d = sd.getDistance("dwayne", "duane");
|
||||||
assertTrue(d > 0.599 && d < 0.601);
|
assertEquals(d,0.666,0.001);
|
||||||
d = sd.getDistance("dixon", "dicksonx");
|
d = sd.getDistance("dixon", "dicksonx");
|
||||||
assertTrue(d > 0.199 && d < 0.201);
|
assertEquals(d,0.5,0.001);
|
||||||
d = sd.getDistance("six", "ten");
|
d = sd.getDistance("six", "ten");
|
||||||
assertTrue(d == 0f);
|
assertEquals(d,0,0.001);
|
||||||
float d1 = sd.getDistance("zac ephron", "zac efron");
|
float d1 = sd.getDistance("zac ephron", "zac efron");
|
||||||
float d2 = sd.getDistance("zac ephron", "kai ephron");
|
float d2 = sd.getDistance("zac ephron", "kai ephron");
|
||||||
assertTrue(d1 < d2);
|
assertEquals(d1,d2,0.001);
|
||||||
d1 = sd.getDistance("brittney spears", "britney spears");
|
d1 = sd.getDistance("brittney spears", "britney spears");
|
||||||
d2 = sd.getDistance("brittney spears", "brittney startzman");
|
d2 = sd.getDistance("brittney spears", "brittney startzman");
|
||||||
assertTrue(d1 > d2);
|
assertTrue(d1 > d2);
|
||||||
|
|
|
@ -129,19 +129,22 @@ public class TestSpellChecker extends TestCase {
|
||||||
assertEquals(similar[0], "five");
|
assertEquals(similar[0], "five");
|
||||||
|
|
||||||
similar = spellChecker.suggestSimilar("ive", 2);
|
similar = spellChecker.suggestSimilar("ive", 2);
|
||||||
assertEquals(1, similar.length);
|
assertEquals(2, similar.length);
|
||||||
assertEquals(similar[0], "five");
|
assertEquals(similar[0], "five");
|
||||||
|
assertEquals(similar[1], "nine");
|
||||||
|
|
||||||
similar = spellChecker.suggestSimilar("fives", 2);
|
similar = spellChecker.suggestSimilar("fives", 2);
|
||||||
assertEquals(1, similar.length);
|
assertEquals(1, similar.length);
|
||||||
assertEquals(similar[0], "five");
|
assertEquals(similar[0], "five");
|
||||||
|
|
||||||
similar = spellChecker.suggestSimilar("fie", 2);
|
similar = spellChecker.suggestSimilar("fie", 2);
|
||||||
assertEquals(1, similar.length);
|
assertEquals(2, similar.length);
|
||||||
assertEquals(similar[0], "five");
|
assertEquals(similar[0], "five");
|
||||||
|
assertEquals(similar[1], "nine");
|
||||||
|
|
||||||
similar = spellChecker.suggestSimilar("fi", 2);
|
similar = spellChecker.suggestSimilar("fi", 2);
|
||||||
assertEquals(0, similar.length);
|
assertEquals(1, similar.length);
|
||||||
|
assertEquals(similar[0], "five");
|
||||||
|
|
||||||
// test restraint to a field
|
// test restraint to a field
|
||||||
similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
|
similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
|
||||||
|
@ -151,8 +154,9 @@ public class TestSpellChecker extends TestCase {
|
||||||
assertEquals(1, similar.length); // there is the term thousand in the field field2
|
assertEquals(1, similar.length); // there is the term thousand in the field field2
|
||||||
|
|
||||||
similar = spellChecker.suggestSimilar("onety", 2);
|
similar = spellChecker.suggestSimilar("onety", 2);
|
||||||
assertEquals(1, similar.length);
|
assertEquals(2, similar.length);
|
||||||
assertEquals(similar[0], "ninety");
|
assertEquals(similar[0], "ninety");
|
||||||
|
assertEquals(similar[1], "one");
|
||||||
try {
|
try {
|
||||||
similar = spellChecker.suggestSimilar("tousand", 10, r, null, false);
|
similar = spellChecker.suggestSimilar("tousand", 10, r, null, false);
|
||||||
} catch (NullPointerException e) {
|
} catch (NullPointerException e) {
|
||||||
|
|
Loading…
Reference in New Issue