Added the StringUtils.unescape method, UnitTest and STATUS change.

PR: Obtained from: Submitted by: Reviewed by: git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/lang/trunk@137169 13f79535-47bb-0310-9956-ffa450edef68
2002-12-07 21:50:30 +00:00 · 2002-12-07 21:50:30 +00:00 · 3e88bc3ca5
parent f682f4b0f0
commit 3e88bc3ca5
3 changed files with 86 additions and 5 deletions
--- a/STATUS.html
+++ b/STATUS.html
@ -7,7 +7,7 @@

 <div align="center">
 <h1>The Jakarta Commons <em>Lang</em> Component</h1>
-$Id: STATUS.html,v 1.27 2002/11/15 00:07:26 scolebourne Exp $<br />
+$Id: STATUS.html,v 1.28 2002/12/07 21:50:29 bayard Exp $<br />
 <a href="#Introduction">[Introduction]</a>
 <a href="#Dependencies">[Dependencies]</a>
 <a href="#Release Info">[Release Info]</a>
@ -72,6 +72,7 @@ A 2.0 will be released after a undefined period of development.
 <ul>
 <li>CharRange.UNSET - will have problems if we introduce reverse ranges that go down to \u0000.</lI>
 <li>Null effects - the classes are not standardised in how they handle null.</li>
+<li>When running the TestFactoryUtils test, sometimes the CPU speed is not quick enough and 'assertEquals((double) System.currentTimeMillis(), (double) ((Date) created).getTime(), 0.01d);' fails. </li>
 </ul>
 </p>

@ -90,7 +91,7 @@ still under discussion, so please mail the list before actioning.</p>
 <li>DateRange</li>
 <li>CloneUtils - utility class to enable cloning via various different mechanisms. This code exists in [pattern] at present.</li>
 <li>StringUtils truncateNicely method - A substring with some extra power to choose where to cut off. It was in Avalon and was added separately to String Taglib from a code submission. This suggests it may have some commonality. [CODED]</li>
-<li>StringUtils unescape method - String Taglib has shown that this method is missing from StringUtils. It would take a String with "\n" in and convert it to the Java character. unescape and escape should be symmetric. </li>
+<li>StringUtils unescape method - String Taglib has shown that this method is missing from StringUtils. It would take a String with "\n" in and convert it to the Java character. unescape and escape should be symmetric - [DONE. Test symmetry] </li>
 <li>ArrayUtils - opinion seems to be that this belongs with [lang] and not [collections]
 <li>GUID and other Identifier generators - these may belong in [util], some code exists in [pattern] at the moment
 <li>CharUtils - Utilities to work on a char[] in the same way as a String
--- a/src/java/org/apache/commons/lang/StringUtils.java
+++ b/src/java/org/apache/commons/lang/StringUtils.java
@ -55,9 +55,10 @@ package org.apache.commons.lang;
 */

 import java.util.StringTokenizer;
-
 import java.util.Iterator;

+import org.apache.commons.lang.exception.NestableRuntimeException;
+
 /**
 * <p>Common <code>String</code> manipulation routines.</p>
 *
@ -73,7 +74,7 @@ import java.util.Iterator;
 * @author <a href="mailto:rand_mcneely@yahoo.com">Rand McNeely</a>
 * @author <a href="mailto:scolebourne@joda.org">Stephen Colebourne</a>
 * @author <a href="mailto:fredrik@westermarck.com">Fredrik Westermarck</a>
- * @version $Id: StringUtils.java,v 1.27 2002/11/27 22:54:29 bayard Exp $
+ * @version $Id: StringUtils.java,v 1.28 2002/12/07 21:50:29 bayard Exp $
 */
 public class StringUtils {

@ -956,6 +957,77 @@ public class StringUtils {
        return buffer.toString();
    }

+    /**
+     * Unescapes any Java literals found in the String. For example, 
+     * it will turn a sequence of '\' and 'n' into a newline character, 
+     * unless the '\' is preceded by another '\'.
+     */
+    public static String unescape(String str) {
+        int sz = str.length();
+        StringBuffer buffer = new StringBuffer(sz);
+        StringBuffer unicode = new StringBuffer(4);
+        boolean hadSlash = false;
+        boolean inUnicode = false;
+        for (int i = 0; i < sz; i++) {
+            char ch = str.charAt(i);
+            if(inUnicode) {
+                // if in unicode, then we're reading unicode 
+                // values in somehow
+                if(unicode.length() == 4) {
+                    // unicode now contains the four hex digits 
+                    // which represents our unicode chacater
+                    try {
+                        int value = Integer.parseInt(unicode.toString(), 16);
+                        buffer.append( (char)value );
+                        unicode.setLength(0);
+                        unicode.setLength(4);
+                        inUnicode = false;
+                        hadSlash = false;
+                    } catch(NumberFormatException nfe) {
+                        throw new NestableRuntimeException("Unable to parse unicode value: "+unicode, nfe);
+                    }
+                } else {
+                    unicode.append(ch);
+                    continue;
+                }
+            }
+            if(hadSlash) {
+                // handle an escaped value
+                hadSlash = false;
+                switch(ch) {
+                    case '\\': buffer.append('\\'); break;
+                    case '\'': buffer.append('\''); break;
+                    case '\"': buffer.append('"'); break;
+                    case 'r':  buffer.append('\r'); break;
+                    case 'f':  buffer.append('\f'); break;
+                    case 't':  buffer.append('\t'); break;
+                    case 'n':  buffer.append('\n'); break;
+                    case 'b':  buffer.append('\b'); break;
+                    case 'u':  {
+                        // uh-oh, we're in unicode country....
+                        inUnicode=true;
+                        break;
+                    }
+                    default :
+                        buffer.append(ch);
+                        break;
+                }
+                continue;
+            } else
+            if(ch == '\\') {
+                hadSlash = true;
+                continue;
+            } 
+            buffer.append(ch);
+        }
+        if(hadSlash) {
+            // then we're in the weird case of a \ at the end of the 
+            // string, let's output it anyway.
+            buffer.append('\\');
+        }
+        return buffer.toString();
+    }
+
    // Padding
    //--------------------------------------------------------------------------
    
--- a/src/test/org/apache/commons/lang/StringUtilsTest.java
+++ b/src/test/org/apache/commons/lang/StringUtilsTest.java
@ -68,7 +68,7 @@ import junit.textui.TestRunner;
 * @author <a href="mailto:scolebourne@joda.org">Stephen Colebourne</a>
 * @author <a href="mailto:ridesmet@users.sourceforge.net">Ringo De Smet</a>
 * @author <a href="mailto:fredrik@westermarck.com>Fredrik Westermarck</a>
- * @version $Id: StringUtilsTest.java,v 1.10 2002/11/23 00:51:34 bayard Exp $
+ * @version $Id: StringUtilsTest.java,v 1.11 2002/12/07 21:50:30 bayard Exp $
 */
 public class StringUtilsTest extends TestCase
 {
@ -317,6 +317,14 @@ public class StringUtilsTest extends TestCase
                     "\\u0234", StringUtils.escape("\u0234") );
        assertEquals("escape(String) failed",
                     "\\u00fd", StringUtils.escape("\u00fd") );
+        assertEquals("unescape(String) failed", 
+                     "", StringUtils.unescape("") );
+        assertEquals("unescape(String) failed", 
+                     "test", StringUtils.unescape("test") );
+        assertEquals("unescape(String) failed", 
+                     "\ntest\b", StringUtils.unescape("\\ntest\\b") );
+        assertEquals("unescape(String) failed", 
+                     "\u123425foo\ntest\b", StringUtils.unescape("\\u123425foo\\ntest\\b") );
    }

    public void testGetLevenshteinDistance() {