From 823f2603a4de316b1ae4d5d6d4ed0175f6edc985 Mon Sep 17 00:00:00 2001
From: "James W. Carman" Operations on {@link java.lang.String} that are
@@ -156,6 +157,11 @@ public class StringUtils {
*/
private static final int PAD_LIMIT = 8192;
+ /**
+ * A regex pattern for recognizing blocks of whitespace characters.
+ */
+ private static final Pattern WHITESPACE_BLOCK = Pattern.compile("\\s+");
+
/**
*
+ * Similar to http://www.w3.org/TR/xpath/#function-normalize
+ * -space
+ *
+ * The function returns the argument string with whitespace normalized by using
+ *
+ * Java's regexp pattern \s defines whitespace as [ \t\n\x0B\f\r]
+ *
+ * For reference:
+ * StringUtils
instances should NOT be constructed in
* standard programming. Instead, the class should be used as
@@ -6257,4 +6263,52 @@ private static boolean endsWith(String str, String suffix, boolean ignoreCase) {
int strOffset = str.length() - suffix.length();
return str.regionMatches(ignoreCase, strOffset, suffix, 0, suffix.length());
}
+
+ /**
+ * {@link #trim(String)}
to remove leading and trailing whitespace
+ * and then replacing sequences of whitespace characters by a single space.
+ *
+ *
+ *
+ * The difference is that Java's whitespace includes vertical tab and form feed, which this functional will also
+ * normalize. Additonally {@link #trim(String)}
removes control characters (char <= 32) from both
+ * ends of this String.
+ *
null
if null String input
+ *
+ * @since 3.0
+ */
+ public static String normalizeSpace(String str) {
+ if(str == null) {
+ return null;
+ }
+ return WHITESPACE_BLOCK.matcher(trim(str)).replaceAll(" ");
+ }
}
diff --git a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
index d95b06066..6f044d0a6 100644
--- a/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
+++ b/src/test/java/org/apache/commons/lang3/StringUtilsTest.java
@@ -1855,4 +1855,17 @@ public void testStartsWithAny() {
assertFalse(StringUtils.startsWithAny("abcxyz", new String[] {null, "xyz", "abcd"}));
}
+ public void testNormalizeSpace() {
+ assertEquals(null, StringUtils.normalizeSpace(null));
+ assertEquals("", StringUtils.normalizeSpace(""));
+ assertEquals("", StringUtils.normalizeSpace(" "));
+ assertEquals("", StringUtils.normalizeSpace("\t"));
+ assertEquals("", StringUtils.normalizeSpace("\n"));
+ assertEquals("", StringUtils.normalizeSpace("\u000B"));
+ assertEquals("", StringUtils.normalizeSpace("\f"));
+ assertEquals("", StringUtils.normalizeSpace("\r"));
+ assertEquals("a", StringUtils.normalizeSpace(" a "));
+ assertEquals("a b c", StringUtils.normalizeSpace(" a b c "));
+ assertEquals("a b c", StringUtils.normalizeSpace("a\t\f\r b\u000B c\n"));
+ }
}