From 04b41e2c97fe5faaf521c2e0581c30e18e2f0c27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonatan=20J=C3=B6nsson?= Date: Thu, 26 Nov 2015 10:23:15 +0100 Subject: [PATCH] LANG-1234: getLevenshteinDistance with a threshold: optimize implementation if the strings lengths differ more than the threshold (closes #118) If the string lengths differ more than the threshold, there's no need for the algorithm to begin allocating arrays etc. --- src/main/java/org/apache/commons/lang3/StringUtils.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/org/apache/commons/lang3/StringUtils.java b/src/main/java/org/apache/commons/lang3/StringUtils.java index 31a572a54..c169e0a62 100644 --- a/src/main/java/org/apache/commons/lang3/StringUtils.java +++ b/src/main/java/org/apache/commons/lang3/StringUtils.java @@ -7585,6 +7585,10 @@ distance is O(nm), but a bound of k allows us to reduce it to O(km) time by only } else if (m == 0) { return n <= threshold ? n : -1; } + // no need to calculate the distance if the length difference is greater than the threshold + else if (Math.abs(n - m) > threshold) { + return -1; + } if (n > m) { // swap the two strings to consume less memory