LANG-640: Add normalizeSpace to StringUtils
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@982844 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
50c1fdecb4
commit
823f2603a4
|
@ -20,6 +20,7 @@
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Operations on {@link java.lang.String} that are
|
* <p>Operations on {@link java.lang.String} that are
|
||||||
|
@ -156,6 +157,11 @@ public class StringUtils {
|
||||||
*/
|
*/
|
||||||
private static final int PAD_LIMIT = 8192;
|
private static final int PAD_LIMIT = 8192;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A regex pattern for recognizing blocks of whitespace characters.
|
||||||
|
*/
|
||||||
|
private static final Pattern WHITESPACE_BLOCK = Pattern.compile("\\s+");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p><code>StringUtils</code> instances should NOT be constructed in
|
* <p><code>StringUtils</code> instances should NOT be constructed in
|
||||||
* standard programming. Instead, the class should be used as
|
* standard programming. Instead, the class should be used as
|
||||||
|
@ -6257,4 +6263,52 @@ private static boolean endsWith(String str, String suffix, boolean ignoreCase) {
|
||||||
int strOffset = str.length() - suffix.length();
|
int strOffset = str.length() - suffix.length();
|
||||||
return str.regionMatches(ignoreCase, strOffset, suffix, 0, suffix.length());
|
return str.regionMatches(ignoreCase, strOffset, suffix, 0, suffix.length());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>
|
||||||
|
* Similar to <a
|
||||||
|
* href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize
|
||||||
|
* -space</a>
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* The function returns the argument string with whitespace normalized by using
|
||||||
|
* <code>{@link #trim(String)}</code> to remove leading and trailing whitespace
|
||||||
|
* and then replacing sequences of whitespace characters by a single space.
|
||||||
|
* </p>
|
||||||
|
* In XML Whitespace characters are the same as those allowed by the <a
|
||||||
|
* href="http://www.w3.org/TR/REC-xml/#NT-S">S</a> production, which is S ::= (#x20 | #x9 | #xD | #xA)+
|
||||||
|
* <p>
|
||||||
|
* Java's regexp pattern \s defines whitespace as [ \t\n\x0B\f\r]
|
||||||
|
* <p>
|
||||||
|
* For reference:
|
||||||
|
* <ul>
|
||||||
|
* <li>\x0B = vertical tab</li>
|
||||||
|
* <li>\f = #xC = form feed</li>
|
||||||
|
* <li>#x20 = space</li>
|
||||||
|
* <li>#x9 = \t</li>
|
||||||
|
* <li>#xA = \n</li>
|
||||||
|
* <li>#xD = \r</li>
|
||||||
|
* </ul>
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* The difference is that Java's whitespace includes vertical tab and form feed, which this functional will also
|
||||||
|
* normalize. Additonally <code>{@link #trim(String)}</code> removes control characters (char <= 32) from both
|
||||||
|
* ends of this String.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @see Pattern
|
||||||
|
* @see #trim(String)
|
||||||
|
* @see <a
|
||||||
|
* href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize-space</a>
|
||||||
|
* @param str the source String to normalize whitespaces from, may be null
|
||||||
|
* @return the modified string with whitespace normalized, <code>null</code> if null String input
|
||||||
|
*
|
||||||
|
* @since 3.0
|
||||||
|
*/
|
||||||
|
public static String normalizeSpace(String str) {
|
||||||
|
if(str == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return WHITESPACE_BLOCK.matcher(trim(str)).replaceAll(" ");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1855,4 +1855,17 @@ public void testStartsWithAny() {
|
||||||
assertFalse(StringUtils.startsWithAny("abcxyz", new String[] {null, "xyz", "abcd"}));
|
assertFalse(StringUtils.startsWithAny("abcxyz", new String[] {null, "xyz", "abcd"}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testNormalizeSpace() {
|
||||||
|
assertEquals(null, StringUtils.normalizeSpace(null));
|
||||||
|
assertEquals("", StringUtils.normalizeSpace(""));
|
||||||
|
assertEquals("", StringUtils.normalizeSpace(" "));
|
||||||
|
assertEquals("", StringUtils.normalizeSpace("\t"));
|
||||||
|
assertEquals("", StringUtils.normalizeSpace("\n"));
|
||||||
|
assertEquals("", StringUtils.normalizeSpace("\u000B"));
|
||||||
|
assertEquals("", StringUtils.normalizeSpace("\f"));
|
||||||
|
assertEquals("", StringUtils.normalizeSpace("\r"));
|
||||||
|
assertEquals("a", StringUtils.normalizeSpace(" a "));
|
||||||
|
assertEquals("a b c", StringUtils.normalizeSpace(" a b c "));
|
||||||
|
assertEquals("a b c", StringUtils.normalizeSpace("a\t\f\r b\u000B c\n"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue