git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1136516 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Matthew Jason Benson 2011-06-16 16:11:20 +00:00
parent a6f4e95950
commit f1b8ec1888
1 changed files with 31 additions and 31 deletions

View File

@ -6068,8 +6068,8 @@ public class StringUtils {
/* /*
The difference between this impl. and the previous is that, rather The difference between this impl. and the previous is that, rather
than creating and retaining a matrix of size s.length()+1 by t.length()+1, than creating and retaining a matrix of size s.length() + 1 by t.length() + 1,
we maintain two single-dimensional arrays of length s.length()+1. The first, d, we maintain two single-dimensional arrays of length s.length() + 1. The first, d,
is the 'current working' distance array that maintains the newest distance cost is the 'current working' distance array that maintains the newest distance cost
counts as we iterate through the characters of String s. Each time we increment counts as we iterate through the characters of String s. Each time we increment
the index of String t we are comparing, d is copied to p, the second int[]. Doing so the index of String t we are comparing, d is copied to p, the second int[]. Doing so
@ -6101,8 +6101,8 @@ public class StringUtils {
m = t.length(); m = t.length();
} }
int p[] = new int[n+1]; //'previous' cost array, horizontally int p[] = new int[n + 1]; //'previous' cost array, horizontally
int d[] = new int[n+1]; // cost array, horizontally int d[] = new int[n + 1]; // cost array, horizontally
int _d[]; //placeholder to assist in swapping p and d int _d[]; //placeholder to assist in swapping p and d
// indexes into strings s and t // indexes into strings s and t
@ -6113,18 +6113,18 @@ public class StringUtils {
int cost; // cost int cost; // cost
for (i = 0; i<=n; i++) { for (i = 0; i <= n; i++) {
p[i] = i; p[i] = i;
} }
for (j = 1; j<=m; j++) { for (j = 1; j <= m; j++) {
t_j = t.charAt(j-1); t_j = t.charAt(j - 1);
d[0] = j; d[0] = j;
for (i=1; i<=n; i++) { for (i = 1; i <= n; i++) {
cost = s.charAt(i-1)==t_j ? 0 : 1; cost = s.charAt(i - 1) == t_j ? 0 : 1;
// minimum of cell to the left+1, to the top+1, diagonally left and up +cost // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
d[i] = Math.min(Math.min(d[i-1]+1, p[i]+1), p[i-1]+cost); d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost);
} }
// copy current distance counts to 'previous row' distance counts // copy current distance counts to 'previous row' distance counts
@ -6171,10 +6171,10 @@ public class StringUtils {
* @throws IllegalArgumentException if either String input {@code null} or negative threshold * @throws IllegalArgumentException if either String input {@code null} or negative threshold
*/ */
public static int getLevenshteinDistance(CharSequence s, CharSequence t, int threshold) { public static int getLevenshteinDistance(CharSequence s, CharSequence t, int threshold) {
if(s == null || t == null) { if (s == null || t == null) {
throw new IllegalArgumentException("Strings must not be null"); throw new IllegalArgumentException("Strings must not be null");
} }
if(threshold < 0) { if (threshold < 0) {
throw new IllegalArgumentException("Threshold must not be negative"); throw new IllegalArgumentException("Threshold must not be negative");
} }
@ -6182,7 +6182,7 @@ public class StringUtils {
This implementation only computes the distance if it's less than or equal to the This implementation only computes the distance if it's less than or equal to the
threshold value, returning -1 if it's greater. The advantage is performance: unbounded threshold value, returning -1 if it's greater. The advantage is performance: unbounded
distance is O(nm), but a bound of k allows us to reduce it to O(km) time by only distance is O(nm), but a bound of k allows us to reduce it to O(km) time by only
computing a diagonal stripe of width 2k+1 of the cost table. computing a diagonal stripe of width 2k + 1 of the cost table.
It is also possible to use this to compute the unbounded Levenshtein distance by starting It is also possible to use this to compute the unbounded Levenshtein distance by starting
the threshold at 1 and doubling each time until the distance is found; this is O(dm), where the threshold at 1 and doubling each time until the distance is found; this is O(dm), where
d is the distance. d is the distance.
@ -6226,13 +6226,13 @@ public class StringUtils {
int m = t.length(); // length of t int m = t.length(); // length of t
// if one string is empty, the edit distance is necessarily the length of the other // if one string is empty, the edit distance is necessarily the length of the other
if(n == 0) { if (n == 0) {
return m <= threshold? m : -1; return m <= threshold ? m : -1;
} else if(m == 0) { } else if (m == 0) {
return n <= threshold? n : -1; return n <= threshold ? n : -1;
} }
if(n > m) { if (n > m) {
// swap the two strings to consume less memory // swap the two strings to consume less memory
CharSequence tmp = s; CharSequence tmp = s;
s = t; s = t;
@ -6241,13 +6241,13 @@ public class StringUtils {
m = t.length(); m = t.length();
} }
int p[] = new int[n+1]; // 'previous' cost array, horizontally int p[] = new int[n + 1]; // 'previous' cost array, horizontally
int d[] = new int[n+1]; // cost array, horizontally int d[] = new int[n + 1]; // cost array, horizontally
int _d[]; // placeholder to assist in swapping p and d int _d[]; // placeholder to assist in swapping p and d
// fill in starting table values // fill in starting table values
int boundary = Math.min(n, threshold) + 1; int boundary = Math.min(n, threshold) + 1;
for(int i = 0; i < boundary; i++) { for (int i = 0; i < boundary; i++) {
p[i] = i; p[i] = i;
} }
// these fills ensure that the value above the rightmost entry of our // these fills ensure that the value above the rightmost entry of our
@ -6256,8 +6256,8 @@ public class StringUtils {
Arrays.fill(d, Integer.MAX_VALUE); Arrays.fill(d, Integer.MAX_VALUE);
// iterates through t // iterates through t
for(int j = 1; j <= m; j++) { for (int j = 1; j <= m; j++) {
char t_j = t.charAt(j-1); // jth character of t char t_j = t.charAt(j - 1); // jth character of t
d[0] = j; d[0] = j;
// compute stripe indices, constrain to array size // compute stripe indices, constrain to array size
@ -6265,23 +6265,23 @@ public class StringUtils {
int max = Math.min(n, j + threshold); int max = Math.min(n, j + threshold);
// the stripe may lead off of the table if s and t are of different sizes // the stripe may lead off of the table if s and t are of different sizes
if(min > max) { if (min > max) {
return -1; return -1;
} }
// ignore entry left of leftmost // ignore entry left of leftmost
if(min > 1) { if (min > 1) {
d[min-1] = Integer.MAX_VALUE; d[min - 1] = Integer.MAX_VALUE;
} }
// iterates through [min, max] in s // iterates through [min, max] in s
for(int i = min; i <= max; i++) { for (int i = min; i <= max; i++) {
if(s.charAt(i-1) == t_j) { if (s.charAt(i - 1) == t_j) {
// diagonally left and up // diagonally left and up
d[i] = p[i-1]; d[i] = p[i - 1];
} else { } else {
// 1 + minimum of cell to the left, to the top, diagonally left and up // 1 + minimum of cell to the left, to the top, diagonally left and up
d[i] = 1 + Math.min(Math.min(d[i-1], p[i]), p[i-1]); d[i] = 1 + Math.min(Math.min(d[i - 1], p[i]), p[i - 1]);
} }
} }
@ -6293,7 +6293,7 @@ public class StringUtils {
// if p[n] is greater than the threshold, there's no guarantee on it being the correct // if p[n] is greater than the threshold, there's no guarantee on it being the correct
// distance // distance
if(p[n] <= threshold) { if (p[n] <= threshold) {
return p[n]; return p[n];
} else { } else {
return -1; return -1;