diff --git a/.cvsignore b/.cvsignore
index eb5a316cb..3a20622e3 100644
--- a/.cvsignore
+++ b/.cvsignore
@@ -1 +1,5 @@
target
+*.iws
+*.ipr
+*.iml
+dist
diff --git a/src/java/org/apache/commons/lang/StringEscapeUtils.java b/src/java/org/apache/commons/lang/StringEscapeUtils.java
new file mode 100644
index 000000000..68ff32190
--- /dev/null
+++ b/src/java/org/apache/commons/lang/StringEscapeUtils.java
@@ -0,0 +1,500 @@
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2002-2003 The Apache Software Foundation. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution, if
+ * any, must include the following acknowlegement:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowlegement may appear in the software itself,
+ * if and wherever such third-party acknowlegements normally appear.
+ *
+ * 4. The names "The Jakarta Project", "Commons", and "Apache Software
+ * Foundation" must not be used to endorse or promote products derived
+ * from this software without prior written permission. For written
+ * permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache"
+ * nor may "Apache" appear in their names without prior written
+ * permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ *
Common String
escaping routines.
Originally from + * Turbine and the + * GenerationJavaCore library and from + * Purple Technology + *
+ * + * @author original author of escape and unescape=? + * @author Alexander Day Chaffee + * @author cybertiger@cyberiantiger.org + * @author Helge Tesgaard + * @author Sean Brown + * @since 2.0 + * @version $Id: StringEscapeUtils.java,v 1.1 2003/03/31 03:53:52 alex Exp $ + */ +public class StringEscapeUtils { + + /** + *StringEscapeUtils
instances should NOT be constructed in
+ * standard programming. Instead, the class should be used as
+ *
StringEscapeUtils.escapeJava("foo");
.
This constructor is public to permit tools that require a JavaBean + * instance to operate.
+ */ + public StringEscapeUtils() { + } + + // Java and JavaScript + //-------------------------------------------------------------------------- + /** + *Escapes any values it finds into their Java String form. + * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)
+ * + *So a tab becomes the characters '\\'
and
+ * 't'
.
The only difference between Java strings and JavaScript strings + * is that in JavaScript, a single quote must be escaped.
+ * + *Example: + *
+ * input string: He didn't say, "Stop!" + * output string: He didn't say, \"Stop!\" + *+ * + * + * @param str String to escape values in + * @return String with escaped values + * @throws NullPointerException if str is
null
+ */
+ public static String escapeJava(String str) {
+ return escapeJavaStyleString(str, false);
+ }
+
+ /**
+ * @see #escapeJava(java.lang.String)
+ * @param out Writer to write escaped string into
+ * @param str String to escape values in
+ * @throws NullPointerException if str is null
+ * @throws IOException if error occurs on undelying Writer
+ */
+ public static void escapeJava(Writer out, String str) throws IOException {
+ escapeJavaStyleString(out, str, false);
+ }
+
+ /**
+ * Escapes any values it finds into their JavaScript String form. + * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)
+ * + *So a tab becomes the characters '\\'
and
+ * 't'
.
The only difference between Java strings and JavaScript strings + * is that in JavaScript, a single quote must be escaped.
+ * + *Example: + *
+ * input string: He didn't say, "Stop!" + * output string: He didn\'t say, \"Stop!\" + *+ * + * + * @param str String to escape values in + * @return String with escaped values + * @throws NullPointerException if str is
null
+ */
+ public static String escapeJavaScript(String str) {
+ return escapeJavaStyleString(str, true);
+ }
+
+ /**
+ * @see #escapeJavaScript(java.lang.String)
+ * @param out Writer to write escaped string into
+ * @param str String to escape values in
+ * @throws NullPointerException if str is null
+ * @throws IOException if error occurs on undelying Writer
+ **/
+ public static void escapeJavaScript(Writer out, String str) throws IOException {
+ escapeJavaStyleString(out, str, true);
+ }
+
+ private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes) {
+ try {
+ StringPrintWriter writer = new StringPrintWriter(str.length() * 2);
+ escapeJavaStyleString(writer, str, escapeSingleQuotes);
+ return writer.getString();
+ }
+ catch (IOException ioe) {
+ // this should never ever happen while writing to a StringWriter
+ ioe.printStackTrace();
+ return null;
+ }
+ }
+
+ private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote) throws IOException {
+ int sz;
+ sz = str.length();
+ for (int i = 0; i < sz; i++) {
+ char ch = str.charAt(i);
+
+ // handle unicode
+ if (ch > 0xfff) {
+ out.write("\\u" + Integer.toHexString(ch));
+ } else if (ch > 0xff) {
+ out.write("\\u0" + Integer.toHexString(ch));
+ } else if (ch > 0x7f) {
+ out.write("\\u00" + Integer.toHexString(ch));
+ } else if (ch < 32) {
+ switch (ch) {
+ case '\b' :
+ out.write('\\');
+ out.write('b');
+ break;
+ case '\n' :
+ out.write('\\');
+ out.write('n');
+ break;
+ case '\t' :
+ out.write('\\');
+ out.write('t');
+ break;
+ case '\f' :
+ out.write('\\');
+ out.write('f');
+ break;
+ case '\r' :
+ out.write('\\');
+ out.write('r');
+ break;
+ default :
+ if (ch > 0xf) {
+ out.write("\\u00" + Integer.toHexString(ch));
+ } else {
+ out.write("\\u000" + Integer.toHexString(ch));
+ }
+ break;
+ }
+ } else {
+ switch (ch) {
+ case '\'' :
+ if (escapeSingleQuote) out.write('\\');
+ out.write('\'');
+ break;
+ case '"' :
+ out.write('\\');
+ out.write('"');
+ break;
+ case '\\' :
+ out.write('\\');
+ out.write('\\');
+ break;
+ default :
+ out.write(ch);
+ break;
+ }
+ }
+ }
+ }
+
+ /**
+ * Unescapes any Java literals found in the String. For example,
+ * it will turn a sequence of '\' and 'n' into a newline character,
+ * unless the '\' is preceded by another '\'.
+ */
+ public static String unescapeJava(String str) {
+ try {
+ StringPrintWriter writer = new StringPrintWriter(str.length());
+ unescapeJava(writer, str);
+ return writer.getString();
+ }
+ catch (IOException ioe) {
+ // this should never ever happen while writing to a StringWriter
+ ioe.printStackTrace();
+ return null;
+ }
+ }
+
+ public static void unescapeJava(Writer out, String str) throws IOException {
+ int sz = str.length();
+ StringBuffer unicode = new StringBuffer(4);
+ boolean hadSlash = false;
+ boolean inUnicode = false;
+ for (int i = 0; i < sz; i++) {
+ char ch = str.charAt(i);
+ if(inUnicode) {
+ // if in unicode, then we're reading unicode
+ // values in somehow
+ if(unicode.length() == 4) {
+ // unicode now contains the four hex digits
+ // which represents our unicode chacater
+ try {
+ int value = Integer.parseInt(unicode.toString(), 16);
+ out.write( (char)value );
+ unicode.setLength(0);
+ unicode.setLength(4);
+ inUnicode = false;
+ hadSlash = false;
+ } catch(NumberFormatException nfe) {
+ throw new NestableRuntimeException("Unable to parse unicode value: "+unicode, nfe);
+ }
+ } else {
+ unicode.append(ch);
+ continue;
+ }
+ }
+ if(hadSlash) {
+ // handle an escaped value
+ hadSlash = false;
+ switch(ch) {
+ case '\\': out.write('\\'); break;
+ case '\'': out.write('\''); break;
+ case '\"': out.write('"'); break;
+ case 'r': out.write('\r'); break;
+ case 'f': out.write('\f'); break;
+ case 't': out.write('\t'); break;
+ case 'n': out.write('\n'); break;
+ case 'b': out.write('\b'); break;
+ case 'u': {
+ // uh-oh, we're in unicode country....
+ inUnicode=true;
+ break;
+ }
+ default :
+ out.write(ch);
+ break;
+ }
+ continue;
+ } else
+ if(ch == '\\') {
+ hadSlash = true;
+ continue;
+ }
+ out.write(ch);
+ }
+ if(hadSlash) {
+ // then we're in the weird case of a \ at the end of the
+ // string, let's output it anyway.
+ out.write('\\');
+ }
+ }
+
+ public static String unescapeJavaScript(String str) {
+ return unescapeJava(str);
+ }
+
+ public static void unescapeJavaScript(Writer out, String str) throws IOException {
+ unescapeJava(out,str);
+ }
+
+ // HTML and XML
+ //--------------------------------------------------------------------------
+
+ // see http://hotwired.lycos.com/webmonkey/reference/special_characters/
+ //todo: initialize these lazily (on first request, rather than at classload time)
+ static Object[][] entities = {
+ // {"#39", new Integer(39)}, // ' - apostrophe
+ {"quot", new Integer(34)}, // " - double-quote
+ {"amp", new Integer(38)}, // & - ampersand
+ {"lt", new Integer(60)}, // < - less-than
+ {"gt", new Integer(62)}, // > - greater-than
+ {"nbsp", new Integer(160)}, // non-breaking space
+ {"copy", new Integer(169)}, // © - copyright
+ {"reg", new Integer(174)}, // ® - registered trademark
+ {"Agrave", new Integer(192)}, // À - uppercase A, grave accent
+ {"Aacute", new Integer(193)}, // Á - uppercase A, acute accent
+ {"Acirc", new Integer(194)}, // Â - uppercase A, circumflex accent
+ {"Atilde", new Integer(195)}, // Ã - uppercase A, tilde
+ {"Auml", new Integer(196)}, // Ä - uppercase A, umlaut
+ {"Aring", new Integer(197)}, // Å - uppercase A, ring
+ {"AElig", new Integer(198)}, // Æ - uppercase AE
+ {"Ccedil", new Integer(199)}, // Ç - uppercase C, cedilla
+ {"Egrave", new Integer(200)}, // È - uppercase E, grave accent
+ {"Eacute", new Integer(201)}, // É - uppercase E, acute accent
+ {"Ecirc", new Integer(202)}, // Ê - uppercase E, circumflex accent
+ {"Euml", new Integer(203)}, // Ë - uppercase E, umlaut
+ {"Igrave", new Integer(204)}, // Ì - uppercase I, grave accent
+ {"Iacute", new Integer(205)}, // Í - uppercase I, acute accent
+ {"Icirc", new Integer(206)}, // Î - uppercase I, circumflex accent
+ {"Iuml", new Integer(207)}, // Ï - uppercase I, umlaut
+ {"ETH", new Integer(208)}, // Ð - uppercase Eth, Icelandic
+ {"Ntilde", new Integer(209)}, // Ñ - uppercase N, tilde
+ {"Ograve", new Integer(210)}, // Ò - uppercase O, grave accent
+ {"Oacute", new Integer(211)}, // Ó - uppercase O, acute accent
+ {"Ocirc", new Integer(212)}, // Ô - uppercase O, circumflex accent
+ {"Otilde", new Integer(213)}, // Õ - uppercase O, tilde
+ {"Ouml", new Integer(214)}, // Ö - uppercase O, umlaut
+ {"Oslash", new Integer(216)}, // Ø - uppercase O, slash
+ {"Ugrave", new Integer(217)}, // Ù - uppercase U, grave accent
+ {"Uacute", new Integer(218)}, // Ú - uppercase U, acute accent
+ {"Ucirc", new Integer(219)}, // Û - uppercase U, circumflex accent
+ {"Uuml", new Integer(220)}, // Ü - uppercase U, umlaut
+ {"Yacute", new Integer(221)}, // Ý - uppercase Y, acute accent
+ {"THORN", new Integer(222)}, // Þ - uppercase THORN, Icelandic
+ {"szlig", new Integer(223)}, // ß - lowercase sharps, German
+ {"agrave", new Integer(224)}, // à - lowercase a, grave accent
+ {"aacute", new Integer(225)}, // á - lowercase a, acute accent
+ {"acirc", new Integer(226)}, // â - lowercase a, circumflex accent
+ {"atilde", new Integer(227)}, // ã - lowercase a, tilde
+ {"auml", new Integer(228)}, // ä - lowercase a, umlaut
+ {"aring", new Integer(229)}, // å - lowercase a, ring
+ {"aelig", new Integer(230)}, // æ - lowercase ae
+ {"ccedil", new Integer(231)}, // ç - lowercase c, cedilla
+ {"egrave", new Integer(232)}, // è - lowercase e, grave accent
+ {"eacute", new Integer(233)}, // é - lowercase e, acute accent
+ {"ecirc", new Integer(234)}, // ê - lowercase e, circumflex accent
+ {"euml", new Integer(235)}, // ë - lowercase e, umlaut
+ {"igrave", new Integer(236)}, // ì - lowercase i, grave accent
+ {"iacute", new Integer(237)}, // í - lowercase i, acute accent
+ {"icirc", new Integer(238)}, // î - lowercase i, circumflex accent
+ {"iuml", new Integer(239)}, // ï - lowercase i, umlaut
+ {"igrave", new Integer(236)}, // ì - lowercase i, grave accent
+ {"iacute", new Integer(237)}, // í - lowercase i, acute accent
+ {"icirc", new Integer(238)}, // î - lowercase i, circumflex accent
+ {"iuml", new Integer(239)}, // ï - lowercase i, umlaut
+ {"eth", new Integer(240)}, // ð - lowercase eth, Icelandic
+ {"ntilde", new Integer(241)}, // ñ - lowercase n, tilde
+ {"ograve", new Integer(242)}, // ò - lowercase o, grave accent
+ {"oacute", new Integer(243)}, // ó - lowercase o, acute accent
+ {"ocirc", new Integer(244)}, // ô - lowercase o, circumflex accent
+ {"otilde", new Integer(245)}, // õ - lowercase o, tilde
+ {"ouml", new Integer(246)}, // ö - lowercase o, umlaut
+ {"oslash", new Integer(248)}, // ø - lowercase o, slash
+ {"ugrave", new Integer(249)}, // ù - lowercase u, grave accent
+ {"uacute", new Integer(250)}, // ú - lowercase u, acute accent
+ {"ucirc", new Integer(251)}, // û - lowercase u, circumflex accent
+ {"uuml", new Integer(252)}, // ü - lowercase u, umlaut
+ {"yacute", new Integer(253)}, // ý - lowercase y, acute accent
+ {"thorn", new Integer(254)}, // þ - lowercase thorn, Icelandic
+ {"yuml", new Integer(255)}, // ÿ - lowercase y, umlaut
+ {"euro", new Integer(8364)}, // Euro symbol
+ };
+ static Map e2i = new HashMap();
+ static Map i2e = new HashMap();
+ static {
+ for (int i=0; i+ * StringPrintWriter out = new StringPrintWriter(); + * printTo(out); + * System.out.println( out.getString() ); + *+ * @author Alex Chaffee + * @author Scott Stanchfield + **/ +public class StringPrintWriter extends PrintWriter { + public StringPrintWriter() { + super(new StringWriter()); + } + + public StringPrintWriter(int initialSize) { + super(new StringWriter(initialSize)); + } + + /** + * Since toString() returns information *about* this object, we + * want a separate method to extract just the contents of the + * internal buffer as a String. + * @return the contents of the internal string buffer + */ + public String getString() { + flush(); + return ((StringWriter) out).toString(); + } +} + diff --git a/src/test/org/apache/commons/lang/LangTestSuite.java b/src/test/org/apache/commons/lang/LangTestSuite.java index e21464171..201dd4885 100644 --- a/src/test/org/apache/commons/lang/LangTestSuite.java +++ b/src/test/org/apache/commons/lang/LangTestSuite.java @@ -63,7 +63,7 @@ import junit.textui.TestRunner; * * @author Stephen Colebourne * @author Ringo De Smet - * @version $Id: LangTestSuite.java,v 1.13 2003/03/23 21:49:13 scolebourne Exp $ + * @version $Id: LangTestSuite.java,v 1.14 2003/03/31 03:53:52 alex Exp $ */ public class LangTestSuite extends TestCase { @@ -101,6 +101,7 @@ public class LangTestSuite extends TestCase { suite.addTest(StringUtilsSubstringTest.suite()); suite.addTest(StringUtilsEqualsIndexOfTest.suite()); suite.addTest(StringUtilsIsTest.suite()); + suite.addTest(StringEscapeUtilsTest.suite()); return suite; } } diff --git a/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java b/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java new file mode 100644 index 000000000..586962725 --- /dev/null +++ b/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java @@ -0,0 +1,182 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2002-2003 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, if + * any, must include the following acknowlegement: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowlegement may appear in the software itself, + * if and wherever such third-party acknowlegements normally appear. + * + * 4. The names "The Jakarta Project", "Commons", and "Apache Software + * Foundation" must not be used to endorse or promote products derived + * from this software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + *