From afedbae8aca69a092d65766640361801bf2b5d9b Mon Sep 17 00:00:00 2001 From: pascalschumacher Date: Fri, 20 May 2016 20:04:30 +0200 Subject: [PATCH] LANG-1206: Improve CharSetUtils.squeeze() performance (closes #147) patch by Mohammed Alfallaj --- src/changes/changes.xml | 1 + .../apache/commons/lang3/CharSetUtils.java | 23 +++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index bd03520d3..a03b992f7 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -22,6 +22,7 @@ + Improve CharSetUtils.squeeze() performance Add RandomStringUtils#randomGraph and #randomPrint which match corresponding regular expression class StringUtils#startsWithAny/endsWithAny is case sensitive - documented as case insensitive Add StopWatch#getTime(TimeUnit) diff --git a/src/main/java/org/apache/commons/lang3/CharSetUtils.java b/src/main/java/org/apache/commons/lang3/CharSetUtils.java index 859967410..67d6feaf2 100644 --- a/src/main/java/org/apache/commons/lang3/CharSetUtils.java +++ b/src/main/java/org/apache/commons/lang3/CharSetUtils.java @@ -68,13 +68,26 @@ public class CharSetUtils { final StringBuilder buffer = new StringBuilder(str.length()); final char[] chrs = str.toCharArray(); final int sz = chrs.length; - char lastChar = ' '; + char lastChar = chrs[0]; char ch = ' '; - for (int i = 0; i < sz; i++) { + Character inChars = null; + Character notInChars = null; + buffer.append(lastChar); + for (int i = 1; i < sz; i++) { ch = chrs[i]; - // Compare with contains() last for performance. - if (ch == lastChar && i != 0 && chars.contains(ch)) { - continue; + if (ch == lastChar) { + if ((inChars != null) && (ch == inChars)) { + continue; + } else { + if ((notInChars == null) || (ch != notInChars)) { + if (chars.contains(ch)) { + inChars = ch; + continue; + } else { + notInChars = ch; + } + } + } } buffer.append(ch); lastChar = ch;