From 0714795b6fd29dd895d65a6e7397ee7f768cb1bf Mon Sep 17 00:00:00 2001 From: Henri Yandell Date: Tue, 17 Nov 2009 08:45:55 +0000 Subject: [PATCH] Removing the EscapeUtils/UnescapeUtils classes, and making StringEscapeUtils the replacement. Writer variants of StringEscapeUtils are dropped - instead you hit those via the translator objects. Some javadoc/testing cleanup needed. git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@881197 13f79535-47bb-0310-9956-ffa450edef68 --- .../commons/lang/StringEscapeUtils.java | 464 ++++++++---------- .../lang/text/translate/EscapeUtils.java | 132 ----- .../lang/text/translate/UnescapeUtils.java | 131 ----- .../commons/lang/StringEscapeUtilsTest.java | 82 ++-- .../lang/text/translate/EscapeUtilsTest.java | 30 -- .../text/translate/UnescapeUtilsTest.java | 30 -- 6 files changed, 243 insertions(+), 626 deletions(-) delete mode 100644 src/java/org/apache/commons/lang/text/translate/EscapeUtils.java delete mode 100644 src/java/org/apache/commons/lang/text/translate/UnescapeUtils.java delete mode 100644 src/test/org/apache/commons/lang/text/translate/EscapeUtilsTest.java delete mode 100644 src/test/org/apache/commons/lang/text/translate/UnescapeUtilsTest.java diff --git a/src/java/org/apache/commons/lang/StringEscapeUtils.java b/src/java/org/apache/commons/lang/StringEscapeUtils.java index 95aef04e8..10ec150f5 100644 --- a/src/java/org/apache/commons/lang/StringEscapeUtils.java +++ b/src/java/org/apache/commons/lang/StringEscapeUtils.java @@ -19,8 +19,11 @@ import java.io.IOException; import java.io.Writer; -import org.apache.commons.lang.text.translate.EscapeUtils; -import org.apache.commons.lang.text.translate.UnescapeUtils; +import org.apache.commons.lang.text.translate.*; + +// CsvTranslators +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang.CharUtils; /** *

Escapes and unescapes Strings for @@ -38,12 +41,162 @@ * @author Pete Gieser * @since 2.0 * @version $Id$ - * - * @deprecated Use text.translate.EscapeUtils and text.translate.UnescapeUtils instead */ -@Deprecated public class StringEscapeUtils { + /* ESCAPE TRANSLATORS */ + + public static final CharSequenceTranslator ESCAPE_JAVA = + new LookupTranslator( + new String[][] { + {"\"", "\\\""}, + {"\\", "\\\\"}, + }).with( + new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) + ).with( + UnicodeEscaper.outsideOf(32, 0x7f) + ); + + public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = + new AggregateTranslator( + new LookupTranslator( + new String[][] { + {"'", "\\'"}, + {"\"", "\\\""}, + {"\\", "\\\\"}, + {"/", "\\/"} + }), + new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), + UnicodeEscaper.outsideOf(32, 0x7f) + ); + + public static final CharSequenceTranslator ESCAPE_XML = + new AggregateTranslator( + new LookupTranslator(EntityArrays.BASIC_ESCAPE()), + new LookupTranslator(EntityArrays.APOS_ESCAPE()) + ); + + public static final CharSequenceTranslator ESCAPE_HTML3 = + new AggregateTranslator( + new LookupTranslator(EntityArrays.BASIC_ESCAPE()), + new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) + ); + + public static final CharSequenceTranslator ESCAPE_HTML4 = + new AggregateTranslator( + new LookupTranslator(EntityArrays.BASIC_ESCAPE()), + new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), + new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) + ); + + public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); + + // TODO: Create a parent class - 'SinglePassTranslator' ? + // TODO: It would handle the index checking, and length returning, and + // TODO: could also have an optimization check method. + static class CsvEscaper extends CharSequenceTranslator { + + private static final char CSV_DELIMITER = ','; + private static final char CSV_QUOTE = '"'; + private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); + private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; + + // TODO: Replace with a RegexTranslator. That should consume the number of characters the regex uses up? + @Override + public int translate(CharSequence input, int index, Writer out) throws IOException { + + if(index != 0) { + throw new IllegalStateException("CsvEscaper should never reach the [1] index"); + } + + if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { + out.write(input.toString()); + } else { + out.write(CSV_QUOTE); + out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); + out.write(CSV_QUOTE); + } + return input.length(); + } + } + + /* UNESCAPE TRANSLATORS */ + + // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? + public static final CharSequenceTranslator UNESCAPE_JAVA = + new AggregateTranslator( + new UnicodeUnescaper(), + new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), + new LookupTranslator( + new String[][] { + {"\\\\", "\\"}, + {"\\\"", "\""}, + {"\\'", "'"}, + {"\\", ""} + }) + ); + + public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; + + public static final CharSequenceTranslator UNESCAPE_HTML3 = + new AggregateTranslator( + new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), + new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), + new NumericEntityUnescaper() + ); + + public static final CharSequenceTranslator UNESCAPE_HTML4 = + new AggregateTranslator( + new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), + new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), + new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), + new NumericEntityUnescaper() + ); + + public static final CharSequenceTranslator UNESCAPE_XML = + new AggregateTranslator( + new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), + new LookupTranslator(EntityArrays.APOS_UNESCAPE()), + new NumericEntityUnescaper() + ); + + public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); + + static class CsvUnescaper extends CharSequenceTranslator { + + private static final char CSV_DELIMITER = ','; + private static final char CSV_QUOTE = '"'; + private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); + private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; + + // TODO: Replace with a RegexTranslator. That should consume the number of characters the regex uses up? + @Override + public int translate(CharSequence input, int index, Writer out) throws IOException { + + if(index != 0) { + throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); + } + + if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) { + out.write(input.toString()); + return input.length(); + } + + // strip quotes + String quoteless = input.subSequence(1, input.length() - 1).toString(); + + if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { + // deal with escaped quotes; ie) "" + out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); + } else { + out.write(input.toString()); + } + return input.length(); + } + } + + /* Helper functions */ + /** *

StringEscapeUtils instances should NOT be constructed in * standard programming.

@@ -78,39 +231,25 @@ public StringEscapeUtils() { * *

* - * @param str String to escape values in, may be null + * @param input String to escape values in, may be null * @return String with escaped values, null if null string input */ - public static String escapeJava(String str) { - return EscapeUtils.escapeJava(str); + public static final String escapeJava(String input) { + return ESCAPE_JAVA.translate(input); } /** - *

Escapes the characters in a String using Java String rules to - * a Writer.

- * - *

A null string input has no effect.

- * - * @see #escapeJava(java.lang.String) - * @param out Writer to write escaped string into - * @param str String to escape values in, may be null - * @throws IllegalArgumentException if the Writer is null - * @throws IOException if error occurs on underlying Writer - */ - public static void escapeJava(Writer out, String str) throws IOException { - EscapeUtils.ESCAPE_JAVA.translate(str, out); - } - - /** - *

Escapes the characters in a String using JavaScript String rules.

- *

Escapes any values it finds into their JavaScript String form. + *

Escapes the characters in a String using EcmaScript String rules.

+ *

Escapes any values it finds into their EcmaScript String form. * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)

* *

So a tab becomes the characters '\\' and * 't'.

* - *

The only difference between Java strings and JavaScript strings - * is that in JavaScript, a single quote and forward-slash (/) are escaped.

+ *

The only difference between Java strings and EcmaScript strings + * is that in EcmaScript, a single quote and forward-slash (/) are escaped.

+ * + *

Note that EcmaScript is best known by the JavaScript and ActionScript dialects.

* *

Example: *

@@ -119,27 +258,11 @@ public static void escapeJava(Writer out, String str) throws IOException {
      * 
*

* - * @param str String to escape values in, may be null + * @param input String to escape values in, may be null * @return String with escaped values, null if null string input */ - public static String escapeJavaScript(String str) { - return EscapeUtils.escapeEcmaScript(str); - } - - /** - *

Escapes the characters in a String using JavaScript String rules - * to a Writer.

- * - *

A null string input has no effect.

- * - * @see #escapeJavaScript(java.lang.String) - * @param out Writer to write escaped string into - * @param str String to escape values in, may be null - * @throws IllegalArgumentException if the Writer is null - * @throws IOException if error occurs on underlying Writer - **/ - public static void escapeJavaScript(Writer out, String str) throws IOException { - EscapeUtils.ESCAPE_ECMASCRIPT.translate(str, out); + public static final String escapeEcmaScript(String input) { + return ESCAPE_ECMASCRIPT.translate(input); } /** @@ -148,65 +271,26 @@ public static void escapeJavaScript(Writer out, String str) throws IOException { * 'n' into a newline character, unless the '\' * is preceded by another '\'.

* - * @param str the String to unescape, may be null + * @param input the String to unescape, may be null * @return a new unescaped String, null if null string input */ - public static String unescapeJava(String str) { - return UnescapeUtils.unescapeJava(str); + public static final String unescapeJava(String input) { + return UNESCAPE_JAVA.translate(input); } /** - *

Unescapes any Java literals found in the String to a - * Writer.

- * - *

For example, it will turn a sequence of '\' and - * 'n' into a newline character, unless the '\' - * is preceded by another '\'.

- * - *

A null string input has no effect.

- * - * @param out the Writer used to output unescaped characters - * @param str the String to unescape, may be null - * @throws IllegalArgumentException if the Writer is null - * @throws IOException if error occurs on underlying Writer - */ - public static void unescapeJava(Writer out, String str) throws IOException { - UnescapeUtils.UNESCAPE_JAVA.translate(str, out); - } - - /** - *

Unescapes any JavaScript literals found in the String.

+ *

Unescapes any EcmaScript literals found in the String.

* *

For example, it will turn a sequence of '\' and 'n' * into a newline character, unless the '\' is preceded by another * '\'.

* * @see #unescapeJava(String) - * @param str the String to unescape, may be null + * @param input the String to unescape, may be null * @return A new unescaped String, null if null string input */ - public static String unescapeJavaScript(String str) { - return UnescapeUtils.unescapeEcmaScript(str); - } - - /** - *

Unescapes any JavaScript literals found in the String to a - * Writer.

- * - *

For example, it will turn a sequence of '\' and 'n' - * into a newline character, unless the '\' is preceded by another - * '\'.

- * - *

A null string input has no effect.

- * - * @see #unescapeJava(Writer,String) - * @param out the Writer used to output unescaped characters - * @param str the String to unescape, may be null - * @throws IllegalArgumentException if the Writer is null - * @throws IOException if error occurs on underlying Writer - */ - public static void unescapeJavaScript(Writer out, String str) throws IOException { - UnescapeUtils.UNESCAPE_ECMASCRIPT.translate(str, out); + public static final String unescapeEcmaScript(String input) { + return UNESCAPE_ECMASCRIPT.translate(input); } // HTML and XML @@ -227,7 +311,7 @@ public static void unescapeJavaScript(Writer out, String str) throws IOException * Note that the commonly used apostrophe escape character (') * is not a legal entity and so is not supported).

* - * @param str the String to escape, may be null + * @param input the String to escape, may be null * @return a new escaped String, null if null string input * * @see #unescapeHtml(String) @@ -237,43 +321,14 @@ public static void unescapeJavaScript(Writer out, String str) throws IOException * @see HTML 4.01 Character References * @see HTML 4.01 Code positions */ - public static String escapeHtml(String str) { - return EscapeUtils.escapeHtml4(str); + public static final String escapeHtml4(String input) { + return ESCAPE_HTML4.translate(input); } - /** - *

Escapes the characters in a String using HTML entities and writes - * them to a Writer.

- * - *

- * For example: - *

- * "bread" & "butter" - *

becomes:

- * "bread" & "butter". - * - *

Supports all known HTML 4.0 entities, including funky accents. - * Note that the commonly used apostrophe escape character (') - * is not a legal entity and so is not supported).

- * - * @param writer the writer receiving the escaped string, not null - * @param string the String to escape, may be null - * @throws IllegalArgumentException if the writer is null - * @throws IOException when Writer passed throws the exception from - * calls to the {@link Writer#write(int)} methods. - * - * @see #escapeHtml(String) - * @see #unescapeHtml(String) - * @see ISO Entities - * @see HTML 3.2 Character Entities for ISO Latin-1 - * @see HTML 4.0 Character entity references - * @see HTML 4.01 Character References - * @see HTML 4.01 Code positions - */ - public static void escapeHtml(Writer writer, String string) throws IOException { - EscapeUtils.ESCAPE_HTML4.translate(string, writer); + public static final String escapeHtml3(String input) { + return ESCAPE_HTML3.translate(input); } - + //----------------------------------------------------------------------- /** *

Unescapes a string containing entity escapes to a string @@ -287,34 +342,15 @@ public static void escapeHtml(Writer writer, String string) throws IOException { * verbatim into the result string. e.g. ">&zzzz;x" will * become ">&zzzz;x".

* - * @param str the String to unescape, may be null + * @param input the String to unescape, may be null * @return a new unescaped String, null if null string input - * @see #escapeHtml(Writer, String) */ - public static String unescapeHtml(String str) { - return UnescapeUtils.unescapeHtml4(str); + public static final String unescapeHtml4(String input) { + return UNESCAPE_HTML4.translate(input); } - /** - *

Unescapes a string containing entity escapes to a string - * containing the actual Unicode characters corresponding to the - * escapes. Supports HTML 4.0 entities.

- * - *

For example, the string "&lt;Fran&ccedil;ais&gt;" - * will become "<Français>"

- * - *

If an entity is unrecognized, it is left alone, and inserted - * verbatim into the result string. e.g. "&gt;&zzzz;x" will - * become ">&zzzz;x".

- * - * @param writer the writer receiving the unescaped string, not null - * @param string the String to unescape, may be null - * @throws IllegalArgumentException if the writer is null - * @throws IOException if an IOException occurs - * @see #escapeHtml(String) - */ - public static void unescapeHtml(Writer writer, String string) throws IOException { - UnescapeUtils.UNESCAPE_HTML4.translate(string, writer); + public static final String unescapeHtml3(String input) { + return UNESCAPE_HTML3.translate(input); } //----------------------------------------------------------------------- @@ -331,36 +367,14 @@ public static void unescapeHtml(Writer writer, String string) throws IOException *

Note that unicode characters greater than 0x7f are as of 3.0, no longer * escaped.

* - * @param writer the writer receiving the unescaped string, not null - * @param str the String to escape, may be null - * @throws IllegalArgumentException if the writer is null - * @throws IOException if there is a problem writing - * @see #unescapeXml(java.lang.String) - */ - public static void escapeXml(Writer writer, String str) throws IOException { - EscapeUtils.ESCAPE_XML.translate(str, writer); - } - - /** - *

Escapes the characters in a String using XML entities.

- * - *

For example: "bread" & "butter" => - * &quot;bread&quot; &amp; &quot;butter&quot;. - *

- * - *

Supports only the five basic XML entities (gt, lt, quot, amp, apos). - * Does not support DTDs or external entities.

- * - *

Note that unicode characters greater than 0x7f are as of 3.0, no longer - * escaped.

- * - * @param str the String to escape, may be null + * @param input the String to escape, may be null * @return a new escaped String, null if null string input * @see #unescapeXml(java.lang.String) */ - public static String escapeXml(String str) { - return EscapeUtils.escapeXml(str); + public static final String escapeXml(String input) { + return ESCAPE_XML.translate(input); } + //----------------------------------------------------------------------- /** @@ -374,34 +388,14 @@ public static String escapeXml(String str) { *

Note that numerical \\u unicode codes are unescaped to their respective * unicode characters. This may change in future releases.

* - * @param writer the writer receiving the unescaped string, not null - * @param str the String to unescape, may be null - * @throws IllegalArgumentException if the writer is null - * @throws IOException if there is a problem writing - * @see #escapeXml(String) - */ - public static void unescapeXml(Writer writer, String str) throws IOException { - UnescapeUtils.UNESCAPE_XML.translate(str, writer); - } - - /** - *

Unescapes a string containing XML entity escapes to a string - * containing the actual Unicode characters corresponding to the - * escapes.

- * - *

Supports only the five basic XML entities (gt, lt, quot, amp, apos). - * Does not support DTDs or external entities.

- * - *

Note that numerical \\u unicode codes are unescaped to their respective - * unicode characters. This may change in future releases.

- * - * @param str the String to unescape, may be null + * @param input the String to unescape, may be null * @return a new unescaped String, null if null string input * @see #escapeXml(String) */ - public static String unescapeXml(String str) { - return UnescapeUtils.unescapeXml(str); + public static final String unescapeXml(String input) { + return UNESCAPE_XML.translate(input); } + //----------------------------------------------------------------------- @@ -422,40 +416,13 @@ public static String unescapeXml(String str) { * see Wikipedia and * RFC 4180. * - * @param str the input CSV column String, may be null + * @param input the input CSV column String, may be null * @return the input String, enclosed in double quotes if the value contains a comma, * newline or double quote, null if null string input * @since 2.4 */ - public static String escapeCsv(String str) { - return EscapeUtils.escapeCsv(str); - } - - /** - *

Writes a String value for a CSV column enclosed in double quotes, - * if required.

- * - *

If the value contains a comma, newline or double quote, then the - * String value is written enclosed in double quotes.

- *

- * - *

Any double quote characters in the value are escaped with another double quote.

- * - *

If the value does not contain a comma, newline or double quote, then the - * String value is written unchanged (null values are ignored).

- *

- * - * see Wikipedia and - * RFC 4180. - * - * @param str the input CSV column String, may be null - * @param out Writer to write input string to, enclosed in double quotes if it contains - * a comma, newline or double quote - * @throws IOException if error occurs on underlying Writer - * @since 2.4 - */ - public static void escapeCsv(Writer out, String str) throws IOException { - EscapeUtils.ESCAPE_CSV.translate(str, out); + public static final String escapeCsv(String input) { + return ESCAPE_CSV.translate(input); } /** @@ -475,40 +442,13 @@ public static void escapeCsv(Writer out, String str) throws IOException { * see Wikipedia and * RFC 4180. * - * @param str the input CSV column String, may be null + * @param input the input CSV column String, may be null * @return the input String, with enclosing double quotes removed and embedded double * quotes unescaped, null if null string input * @since 2.4 */ - public static String unescapeCsv(String str) { - return UnescapeUtils.unescapeCsv(str); - } - - /** - *

Returns a String value for an unescaped CSV column.

- * - *

If the value is enclosed in double quotes, and contains a comma, newline - * or double quote, then quotes are removed. - *

- * - *

Any double quote escaped characters (a pair of double quotes) are unescaped - * to just one double quote.

- * - *

If the value is not enclosed in double quotes, or is and does not contain a - * comma, newline or double quote, then the String value is returned unchanged.

- *

- * - * see Wikipedia and - * RFC 4180. - * - * @param str the input CSV column String, may be null - * @param out Writer to write the input String to, with enclosing double quotes - * removed and embedded double quotes unescaped, null if null string input - * @throws IOException if error occurs on underlying Writer - * @since 2.4 - */ - public static void unescapeCsv(Writer out, String str) throws IOException { - UnescapeUtils.UNESCAPE_CSV.translate(str, out); + public static final String unescapeCsv(String input) { + return UNESCAPE_CSV.translate(input); } } diff --git a/src/java/org/apache/commons/lang/text/translate/EscapeUtils.java b/src/java/org/apache/commons/lang/text/translate/EscapeUtils.java deleted file mode 100644 index 579ace1b9..000000000 --- a/src/java/org/apache/commons/lang/text/translate/EscapeUtils.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.lang.text.translate; - -import java.io.IOException; -import java.io.Writer; - -// CsvEscaper -import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang.CharUtils; - -/** - * Helper class defining various standard language escape functions. - * - * @author Apache Software Foundation - * @since 3.0 - */ -public class EscapeUtils { - - public static final CharSequenceTranslator ESCAPE_JAVA = - new LookupTranslator( - new String[][] { - {"\"", "\\\""}, - {"\\", "\\\\"}, - }).with( - new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) - ).with( - UnicodeEscaper.outsideOf(32, 0x7f) - ); - - public static final String escapeJava(String input) { - return ESCAPE_JAVA.translate(input); - } - - public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = - new AggregateTranslator( - new LookupTranslator( - new String[][] { - {"'", "\\'"}, - {"\"", "\\\""}, - {"\\", "\\\\"}, - {"/", "\\/"} - }), - new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), - UnicodeEscaper.outsideOf(32, 0x7f) - ); - - public static final String escapeEcmaScript(String input) { - return ESCAPE_ECMASCRIPT.translate(input); - } - - public static final CharSequenceTranslator ESCAPE_XML = - new AggregateTranslator( - new LookupTranslator(EntityArrays.BASIC_ESCAPE()), - new LookupTranslator(EntityArrays.APOS_ESCAPE()) - ); - - public static final String escapeXml(String input) { - return ESCAPE_XML.translate(input); - } - - public static final CharSequenceTranslator ESCAPE_HTML3 = - new AggregateTranslator( - new LookupTranslator(EntityArrays.BASIC_ESCAPE()), - new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) - ); - - public static final String escapeHtml3(String input) { - return ESCAPE_HTML3.translate(input); - } - - public static final CharSequenceTranslator ESCAPE_HTML4 = - new AggregateTranslator( - new LookupTranslator(EntityArrays.BASIC_ESCAPE()), - new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), - new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) - ); - - public static final String escapeHtml4(String input) { - return ESCAPE_HTML4.translate(input); - } - - public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); - - public static final String escapeCsv(String input) { - return ESCAPE_CSV.translate(input); - } - - // TODO: Create a parent class - 'SinglePassTranslator' ? - // TODO: It would handle the index checking, and length returning, and - // TODO: could also have an optimization check method. - static class CsvEscaper extends CharSequenceTranslator { - - private static final char CSV_DELIMITER = ','; - private static final char CSV_QUOTE = '"'; - private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); - private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; - - // TODO: Replace with a RegexTranslator. That should consume the number of characters the regex uses up? - @Override - public int translate(CharSequence input, int index, Writer out) throws IOException { - - if(index != 0) { - throw new IllegalStateException("CsvEscaper should never reach the [1] index"); - } - - if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { - out.write(input.toString()); - } else { - out.write(CSV_QUOTE); - out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); - out.write(CSV_QUOTE); - } - return input.length(); - } - } - -} diff --git a/src/java/org/apache/commons/lang/text/translate/UnescapeUtils.java b/src/java/org/apache/commons/lang/text/translate/UnescapeUtils.java deleted file mode 100644 index 24faba562..000000000 --- a/src/java/org/apache/commons/lang/text/translate/UnescapeUtils.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.lang.text.translate; - -import java.io.IOException; -import java.io.Writer; - -// CsvUnescaper -import org.apache.commons.lang.StringUtils; -import org.apache.commons.lang.CharUtils; - -/** - * Helper class defining various standard language unescape functions. - * - * @author Apache Software Foundation - * @since 3.0 - */ -public class UnescapeUtils { - - // throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? - public static final CharSequenceTranslator UNESCAPE_JAVA = - new AggregateTranslator( - new UnicodeUnescaper(), - new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), - new LookupTranslator( - new String[][] { - {"\\\\", "\\"}, - {"\\\"", "\""}, - {"\\'", "'"}, - {"\\", ""} - }) - ); - - public static final String unescapeJava(String input) { - return UNESCAPE_JAVA.translate(input); - } - - public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; - - public static final String unescapeEcmaScript(String input) { - return UNESCAPE_ECMASCRIPT.translate(input); - } - - public static final CharSequenceTranslator UNESCAPE_HTML3 = - new AggregateTranslator( - new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), - new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), - new NumericEntityUnescaper() - ); - - public static final String unescapeHtml3(String input) { - return UNESCAPE_HTML3.translate(input); - } - - public static final CharSequenceTranslator UNESCAPE_HTML4 = - new AggregateTranslator( - new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), - new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), - new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), - new NumericEntityUnescaper() - ); - - public static final String unescapeHtml4(String input) { - return UNESCAPE_HTML4.translate(input); - } - - public static final CharSequenceTranslator UNESCAPE_XML = - new AggregateTranslator( - new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), - new LookupTranslator(EntityArrays.APOS_UNESCAPE()), - new NumericEntityUnescaper() - ); - - public static final String unescapeXml(String input) { - return UNESCAPE_XML.translate(input); - } - - public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); - - public static final String unescapeCsv(String input) { - return UNESCAPE_CSV.translate(input); - } - - static class CsvUnescaper extends CharSequenceTranslator { - - private static final char CSV_DELIMITER = ','; - private static final char CSV_QUOTE = '"'; - private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); - private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; - - // TODO: Replace with a RegexTranslator. That should consume the number of characters the regex uses up? - @Override - public int translate(CharSequence input, int index, Writer out) throws IOException { - - if(index != 0) { - throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); - } - - if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) { - out.write(input.toString()); - return input.length(); - } - - // strip quotes - String quoteless = input.subSequence(1, input.length() - 1).toString(); - - if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { - // deal with escaped quotes; ie) "" - out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); - } else { - out.write(input.toString()); - } - return input.length(); - } - } - -} diff --git a/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java b/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java index 6a3111641..fd56b466d 100644 --- a/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java +++ b/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java @@ -63,14 +63,14 @@ public void testConstructor() { public void testEscapeJava() throws IOException { assertEquals(null, StringEscapeUtils.escapeJava(null)); try { - StringEscapeUtils.escapeJava(null, null); + StringEscapeUtils.ESCAPE_JAVA.translate(null, null); fail(); } catch (IOException ex) { fail(); } catch (IllegalArgumentException ex) { } try { - StringEscapeUtils.escapeJava(null, ""); + StringEscapeUtils.ESCAPE_JAVA.translate("", null); fail(); } catch (IOException ex) { fail(); @@ -123,21 +123,21 @@ private void assertEscapeJava(String message, String expected, String original) assertEquals(message, expected, converted); StringWriter writer = new StringWriter(); - StringEscapeUtils.escapeJava(writer, original); + StringEscapeUtils.ESCAPE_JAVA.translate(original, writer); assertEquals(expected, writer.toString()); } public void testUnescapeJava() throws IOException { assertEquals(null, StringEscapeUtils.unescapeJava(null)); try { - StringEscapeUtils.unescapeJava(null, null); + StringEscapeUtils.UNESCAPE_JAVA.translate(null, null); fail(); } catch (IOException ex) { fail(); } catch (IllegalArgumentException ex) { } try { - StringEscapeUtils.unescapeJava(null, ""); + StringEscapeUtils.UNESCAPE_JAVA.translate("", null); fail(); } catch (IOException ex) { fail(); @@ -177,31 +177,31 @@ private void assertUnescapeJava(String message, String unescaped, String origina expected, actual); StringWriter writer = new StringWriter(); - StringEscapeUtils.unescapeJava(writer, original); + StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer); assertEquals(unescaped, writer.toString()); } - public void testEscapeJavaScript() { - assertEquals(null, StringEscapeUtils.escapeJavaScript(null)); + public void testEscapeEcmaScript() { + assertEquals(null, StringEscapeUtils.escapeEcmaScript(null)); try { - StringEscapeUtils.escapeJavaScript(null, null); + StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null); fail(); } catch (IOException ex) { fail(); } catch (IllegalArgumentException ex) { } try { - StringEscapeUtils.escapeJavaScript(null, ""); + StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null); fail(); } catch (IOException ex) { fail(); } catch (IllegalArgumentException ex) { } - assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeJavaScript("He didn't say, \"stop!\"")); + assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\"")); assertEquals("document.getElementById(\\\"test\\\").value = \\'';")); + StringEscapeUtils.escapeEcmaScript("document.getElementById(\"test\").value = '';")); } @@ -227,10 +227,10 @@ public void testEscapeHtml() { String message = htmlEscapes[i][0]; String expected = htmlEscapes[i][1]; String original = htmlEscapes[i][2]; - assertEquals(message, expected, StringEscapeUtils.escapeHtml(original)); + assertEquals(message, expected, StringEscapeUtils.escapeHtml4(original)); StringWriter sw = new StringWriter(); try { - StringEscapeUtils.escapeHtml(sw, original); + StringEscapeUtils.ESCAPE_HTML4.translate(original, sw); } catch (IOException e) { } String actual = original == null ? null : sw.toString(); @@ -238,16 +238,16 @@ public void testEscapeHtml() { } } - public void testUnescapeHtml() { + public void testUnescapeHtml4() { for (int i = 0; i < htmlEscapes.length; ++i) { String message = htmlEscapes[i][0]; String expected = htmlEscapes[i][2]; String original = htmlEscapes[i][1]; - assertEquals(message, expected, StringEscapeUtils.unescapeHtml(original)); + assertEquals(message, expected, StringEscapeUtils.unescapeHtml4(original)); StringWriter sw = new StringWriter(); try { - StringEscapeUtils.unescapeHtml(sw, original); + StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw); } catch (IOException e) { } String actual = original == null ? null : sw.toString(); @@ -256,18 +256,18 @@ public void testUnescapeHtml() { // \u00E7 is a cedilla (c with wiggle under) // note that the test string must be 7-bit-clean (unicode escaped) or else it will compile incorrectly // on some locales - assertEquals("funny chars pass through OK", "Fran\u00E7ais", StringEscapeUtils.unescapeHtml("Fran\u00E7ais")); + assertEquals("funny chars pass through OK", "Fran\u00E7ais", StringEscapeUtils.unescapeHtml4("Fran\u00E7ais")); - assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml("Hello&;World")); - assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml("Hello&#;World")); - assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml("Hello&# ;World")); - assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml("Hello&##;World")); + assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml4("Hello&;World")); + assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml4("Hello&#;World")); + assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml4("Hello&# ;World")); + assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml4("Hello&##;World")); } public void testUnescapeHexCharsHtml() { // Simple easy to grok test - assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml("€Ÿ")); - assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml("€Ÿ")); + assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("€Ÿ")); + assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("€Ÿ")); // Test all Character values: for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) { Character c1 = new Character(i); @@ -275,19 +275,19 @@ public void testUnescapeHexCharsHtml() { String expected = c1.toString() + c2.toString(); String escapedC1 = "&#x" + Integer.toHexString((c1.charValue())) + ";"; String escapedC2 = "&#x" + Integer.toHexString((c2.charValue())) + ";"; - assertEquals("hex number unescape index " + (int)i, expected, StringEscapeUtils.unescapeHtml(escapedC1 + escapedC2)); + assertEquals("hex number unescape index " + (int)i, expected, StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2)); } } public void testUnescapeUnknownEntity() throws Exception { - assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml("&zzzz;")); + assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;")); } public void testEscapeHtmlVersions() throws Exception { - assertEquals("Β", StringEscapeUtils.escapeHtml("\u0392")); - assertEquals("\u0392", StringEscapeUtils.unescapeHtml("Β")); + assertEquals("Β", StringEscapeUtils.escapeHtml4("\u0392")); + assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("Β")); //todo: refine API for escaping/unescaping specific HTML versions @@ -310,14 +310,14 @@ public void testEscapeXml() throws Exception { StringWriter sw = new StringWriter(); try { - StringEscapeUtils.escapeXml(sw, ""); + StringEscapeUtils.ESCAPE_XML.translate("", sw); } catch (IOException e) { } assertEquals("XML was escaped incorrectly", "<abc>", sw.toString() ); sw = new StringWriter(); try { - StringEscapeUtils.unescapeXml(sw, "<abc>"); + StringEscapeUtils.UNESCAPE_XML.translate("<abc>", sw); } catch (IOException e) { } assertEquals("XML was unescaped incorrectly", "", sw.toString() ); @@ -326,14 +326,14 @@ public void testEscapeXml() throws Exception { // Tests issue #38569 // http://issues.apache.org/bugzilla/show_bug.cgi?id=38569 public void testStandaloneAmphersand() { - assertEquals("", StringEscapeUtils.unescapeHtml("<P&O>")); - assertEquals("test & <", StringEscapeUtils.unescapeHtml("test & <")); + assertEquals("", StringEscapeUtils.unescapeHtml4("<P&O>")); + assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & <")); assertEquals("", StringEscapeUtils.unescapeXml("<P&O>")); assertEquals("test & <", StringEscapeUtils.unescapeXml("test & <")); } public void testLang313() { - assertEquals("& &", StringEscapeUtils.unescapeHtml("& &")); + assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &")); } public void testEscapeCsvString() throws Exception @@ -361,7 +361,7 @@ public void testEscapeCsvWriter() throws Exception private void checkCsvEscapeWriter(String expected, String value) { try { StringWriter writer = new StringWriter(); - StringEscapeUtils.escapeCsv(writer, value); + StringEscapeUtils.ESCAPE_CSV.translate(value, writer); assertEquals(expected, writer.toString()); } catch (IOException e) { fail("Threw: " + e); @@ -397,7 +397,7 @@ public void testUnescapeCsvWriter() throws Exception private void checkCsvUnescapeWriter(String expected, String value) { try { StringWriter writer = new StringWriter(); - StringEscapeUtils.unescapeCsv(writer, value); + StringEscapeUtils.UNESCAPE_CSV.translate(value, writer); assertEquals(expected, writer.toString()); } catch (IOException e) { fail("Threw: " + e); @@ -414,14 +414,14 @@ public void testEscapeHtmlHighUnicode() throws java.io.UnsupportedEncodingExcept String original = new String(data, "UTF8"); - String escaped = StringEscapeUtils.escapeHtml( original ); + String escaped = StringEscapeUtils.escapeHtml4( original ); assertEquals( "High unicode should not have been escaped", original, escaped); - String unescaped = StringEscapeUtils.unescapeHtml( escaped ); + String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); assertEquals( "High unicode should have been unchanged", original, unescaped); // TODO: I think this should hold, needs further investigation -// String unescapedFromEntity = StringEscapeUtils.unescapeHtml( "𝍢" ); +// String unescapedFromEntity = StringEscapeUtils.unescapeHtml4( "𝍢" ); // assertEquals( "High unicode should have been unescaped", original, unescapedFromEntity); } @@ -429,11 +429,11 @@ public void testEscapeHtmlHighUnicode() throws java.io.UnsupportedEncodingExcept public void testEscapeHiragana() throws java.io.UnsupportedEncodingException { // Some random Japanese unicode characters String original = "\u304B\u304C\u3068"; - String escaped = StringEscapeUtils.escapeHtml(original); - assertEquals( "Hiragana character unicode behaviour should not be being escaped by escapeHtml", + String escaped = StringEscapeUtils.escapeHtml4(original); + assertEquals( "Hiragana character unicode behaviour should not be being escaped by escapeHtml4", original, escaped); - String unescaped = StringEscapeUtils.unescapeHtml( escaped ); + String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); assertEquals( "Hiragana character unicode behaviour has changed - expected no unescaping", escaped, unescaped); } diff --git a/src/test/org/apache/commons/lang/text/translate/EscapeUtilsTest.java b/src/test/org/apache/commons/lang/text/translate/EscapeUtilsTest.java deleted file mode 100644 index 27d03ccfa..000000000 --- a/src/test/org/apache/commons/lang/text/translate/EscapeUtilsTest.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.lang.text.translate; - -import junit.framework.TestCase; - -/** - * Unit tests for {@link org.apache.commons.lang.text.translate.EscapeUtils}. - */ -public class EscapeUtilsTest extends TestCase { - - public void testConstructorExists() { - new EscapeUtils(); - } -} diff --git a/src/test/org/apache/commons/lang/text/translate/UnescapeUtilsTest.java b/src/test/org/apache/commons/lang/text/translate/UnescapeUtilsTest.java deleted file mode 100644 index fc090fe27..000000000 --- a/src/test/org/apache/commons/lang/text/translate/UnescapeUtilsTest.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.lang.text.translate; - -import junit.framework.TestCase; - -/** - * Unit tests for {@link org.apache.commons.lang.text.translate.UnescapeUtils}. - */ -public class UnescapeUtilsTest extends TestCase { - - public void testConstructorExists() { - new UnescapeUtils(); - } -}