LANG-640: Add normalizeSpace to StringUtils

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@982844 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
James W. Carman 2010-08-06 01:08:57 +00:00
parent 50c1fdecb4
commit 823f2603a4
2 changed files with 67 additions and 0 deletions

View File

@ -20,6 +20,7 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;
/**
* <p>Operations on {@link java.lang.String} that are
@ -156,6 +157,11 @@ public class StringUtils {
*/
private static final int PAD_LIMIT = 8192;
/**
* A regex pattern for recognizing blocks of whitespace characters.
*/
private static final Pattern WHITESPACE_BLOCK = Pattern.compile("\\s+");
/**
* <p><code>StringUtils</code> instances should NOT be constructed in
* standard programming. Instead, the class should be used as
@ -6257,4 +6263,52 @@ public class StringUtils {
int strOffset = str.length() - suffix.length();
return str.regionMatches(ignoreCase, strOffset, suffix, 0, suffix.length());
}
/**
* <p>
* Similar to <a
* href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize
* -space</a>
* </p>
* <p>
* The function returns the argument string with whitespace normalized by using
* <code>{@link #trim(String)}</code> to remove leading and trailing whitespace
* and then replacing sequences of whitespace characters by a single space.
* </p>
* In XML Whitespace characters are the same as those allowed by the <a
* href="http://www.w3.org/TR/REC-xml/#NT-S">S</a> production, which is S ::= (#x20 | #x9 | #xD | #xA)+
* <p>
* Java's regexp pattern \s defines whitespace as [ \t\n\x0B\f\r]
* <p>
* For reference:
* <ul>
* <li>\x0B = vertical tab</li>
* <li>\f = #xC = form feed</li>
* <li>#x20 = space</li>
* <li>#x9 = \t</li>
* <li>#xA = \n</li>
* <li>#xD = \r</li>
* </ul>
* </p>
* <p>
* The difference is that Java's whitespace includes vertical tab and form feed, which this functional will also
* normalize. Additonally <code>{@link #trim(String)}</code> removes control characters (char &lt;= 32) from both
* ends of this String.
* </p>
*
* @see Pattern
* @see #trim(String)
* @see <a
* href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize-space</a>
* @param str the source String to normalize whitespaces from, may be null
* @return the modified string with whitespace normalized, <code>null</code> if null String input
*
* @since 3.0
*/
public static String normalizeSpace(String str) {
if(str == null) {
return null;
}
return WHITESPACE_BLOCK.matcher(trim(str)).replaceAll(" ");
}
}

View File

@ -1855,4 +1855,17 @@ public class StringUtilsTest extends TestCase {
assertFalse(StringUtils.startsWithAny("abcxyz", new String[] {null, "xyz", "abcd"}));
}
public void testNormalizeSpace() {
assertEquals(null, StringUtils.normalizeSpace(null));
assertEquals("", StringUtils.normalizeSpace(""));
assertEquals("", StringUtils.normalizeSpace(" "));
assertEquals("", StringUtils.normalizeSpace("\t"));
assertEquals("", StringUtils.normalizeSpace("\n"));
assertEquals("", StringUtils.normalizeSpace("\u000B"));
assertEquals("", StringUtils.normalizeSpace("\f"));
assertEquals("", StringUtils.normalizeSpace("\r"));
assertEquals("a", StringUtils.normalizeSpace(" a "));
assertEquals("a b c", StringUtils.normalizeSpace(" a b c "));
assertEquals("a b c", StringUtils.normalizeSpace("a\t\f\r b\u000B c\n"));
}
}