LANG-640: Add normalizeSpace to StringUtils
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@982844 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
50c1fdecb4
commit
823f2603a4
|
@ -20,6 +20,7 @@ import java.util.ArrayList;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p>Operations on {@link java.lang.String} that are
|
||||
|
@ -156,6 +157,11 @@ public class StringUtils {
|
|||
*/
|
||||
private static final int PAD_LIMIT = 8192;
|
||||
|
||||
/**
|
||||
* A regex pattern for recognizing blocks of whitespace characters.
|
||||
*/
|
||||
private static final Pattern WHITESPACE_BLOCK = Pattern.compile("\\s+");
|
||||
|
||||
/**
|
||||
* <p><code>StringUtils</code> instances should NOT be constructed in
|
||||
* standard programming. Instead, the class should be used as
|
||||
|
@ -6257,4 +6263,52 @@ public class StringUtils {
|
|||
int strOffset = str.length() - suffix.length();
|
||||
return str.regionMatches(ignoreCase, strOffset, suffix, 0, suffix.length());
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Similar to <a
|
||||
* href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize
|
||||
* -space</a>
|
||||
* </p>
|
||||
* <p>
|
||||
* The function returns the argument string with whitespace normalized by using
|
||||
* <code>{@link #trim(String)}</code> to remove leading and trailing whitespace
|
||||
* and then replacing sequences of whitespace characters by a single space.
|
||||
* </p>
|
||||
* In XML Whitespace characters are the same as those allowed by the <a
|
||||
* href="http://www.w3.org/TR/REC-xml/#NT-S">S</a> production, which is S ::= (#x20 | #x9 | #xD | #xA)+
|
||||
* <p>
|
||||
* Java's regexp pattern \s defines whitespace as [ \t\n\x0B\f\r]
|
||||
* <p>
|
||||
* For reference:
|
||||
* <ul>
|
||||
* <li>\x0B = vertical tab</li>
|
||||
* <li>\f = #xC = form feed</li>
|
||||
* <li>#x20 = space</li>
|
||||
* <li>#x9 = \t</li>
|
||||
* <li>#xA = \n</li>
|
||||
* <li>#xD = \r</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
* <p>
|
||||
* The difference is that Java's whitespace includes vertical tab and form feed, which this functional will also
|
||||
* normalize. Additonally <code>{@link #trim(String)}</code> removes control characters (char <= 32) from both
|
||||
* ends of this String.
|
||||
* </p>
|
||||
*
|
||||
* @see Pattern
|
||||
* @see #trim(String)
|
||||
* @see <a
|
||||
* href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize-space</a>
|
||||
* @param str the source String to normalize whitespaces from, may be null
|
||||
* @return the modified string with whitespace normalized, <code>null</code> if null String input
|
||||
*
|
||||
* @since 3.0
|
||||
*/
|
||||
public static String normalizeSpace(String str) {
|
||||
if(str == null) {
|
||||
return null;
|
||||
}
|
||||
return WHITESPACE_BLOCK.matcher(trim(str)).replaceAll(" ");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1855,4 +1855,17 @@ public class StringUtilsTest extends TestCase {
|
|||
assertFalse(StringUtils.startsWithAny("abcxyz", new String[] {null, "xyz", "abcd"}));
|
||||
}
|
||||
|
||||
public void testNormalizeSpace() {
|
||||
assertEquals(null, StringUtils.normalizeSpace(null));
|
||||
assertEquals("", StringUtils.normalizeSpace(""));
|
||||
assertEquals("", StringUtils.normalizeSpace(" "));
|
||||
assertEquals("", StringUtils.normalizeSpace("\t"));
|
||||
assertEquals("", StringUtils.normalizeSpace("\n"));
|
||||
assertEquals("", StringUtils.normalizeSpace("\u000B"));
|
||||
assertEquals("", StringUtils.normalizeSpace("\f"));
|
||||
assertEquals("", StringUtils.normalizeSpace("\r"));
|
||||
assertEquals("a", StringUtils.normalizeSpace(" a "));
|
||||
assertEquals("a b c", StringUtils.normalizeSpace(" a b c "));
|
||||
assertEquals("a b c", StringUtils.normalizeSpace("a\t\f\r b\u000B c\n"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue