diff --git a/src/java/org/apache/commons/lang/StringEscapeUtils.java b/src/java/org/apache/commons/lang/StringEscapeUtils.java index 95aef04e8..10ec150f5 100644 --- a/src/java/org/apache/commons/lang/StringEscapeUtils.java +++ b/src/java/org/apache/commons/lang/StringEscapeUtils.java @@ -19,8 +19,11 @@ import java.io.IOException; import java.io.Writer; -import org.apache.commons.lang.text.translate.EscapeUtils; -import org.apache.commons.lang.text.translate.UnescapeUtils; +import org.apache.commons.lang.text.translate.*; + +// CsvTranslators +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang.CharUtils; /** *
Escapes and unescapes String
s for
@@ -38,12 +41,162 @@
* @author Pete Gieser
* @since 2.0
* @version $Id$
- *
- * @deprecated Use text.translate.EscapeUtils and text.translate.UnescapeUtils instead
*/
-@Deprecated
public class StringEscapeUtils {
+ /* ESCAPE TRANSLATORS */
+
+ public static final CharSequenceTranslator ESCAPE_JAVA =
+ new LookupTranslator(
+ new String[][] {
+ {"\"", "\\\""},
+ {"\\", "\\\\"},
+ }).with(
+ new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
+ ).with(
+ UnicodeEscaper.outsideOf(32, 0x7f)
+ );
+
+ public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
+ new AggregateTranslator(
+ new LookupTranslator(
+ new String[][] {
+ {"'", "\\'"},
+ {"\"", "\\\""},
+ {"\\", "\\\\"},
+ {"/", "\\/"}
+ }),
+ new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
+ UnicodeEscaper.outsideOf(32, 0x7f)
+ );
+
+ public static final CharSequenceTranslator ESCAPE_XML =
+ new AggregateTranslator(
+ new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
+ new LookupTranslator(EntityArrays.APOS_ESCAPE())
+ );
+
+ public static final CharSequenceTranslator ESCAPE_HTML3 =
+ new AggregateTranslator(
+ new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
+ new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
+ );
+
+ public static final CharSequenceTranslator ESCAPE_HTML4 =
+ new AggregateTranslator(
+ new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
+ new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
+ new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
+ );
+
+ public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
+
+ // TODO: Create a parent class - 'SinglePassTranslator' ?
+ // TODO: It would handle the index checking, and length returning, and
+ // TODO: could also have an optimization check method.
+ static class CsvEscaper extends CharSequenceTranslator {
+
+ private static final char CSV_DELIMITER = ',';
+ private static final char CSV_QUOTE = '"';
+ private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
+ private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
+
+ // TODO: Replace with a RegexTranslator. That should consume the number of characters the regex uses up?
+ @Override
+ public int translate(CharSequence input, int index, Writer out) throws IOException {
+
+ if(index != 0) {
+ throw new IllegalStateException("CsvEscaper should never reach the [1] index");
+ }
+
+ if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
+ out.write(input.toString());
+ } else {
+ out.write(CSV_QUOTE);
+ out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
+ out.write(CSV_QUOTE);
+ }
+ return input.length();
+ }
+ }
+
+ /* UNESCAPE TRANSLATORS */
+
+ // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
+ public static final CharSequenceTranslator UNESCAPE_JAVA =
+ new AggregateTranslator(
+ new UnicodeUnescaper(),
+ new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
+ new LookupTranslator(
+ new String[][] {
+ {"\\\\", "\\"},
+ {"\\\"", "\""},
+ {"\\'", "'"},
+ {"\\", ""}
+ })
+ );
+
+ public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
+
+ public static final CharSequenceTranslator UNESCAPE_HTML3 =
+ new AggregateTranslator(
+ new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
+ new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
+ new NumericEntityUnescaper()
+ );
+
+ public static final CharSequenceTranslator UNESCAPE_HTML4 =
+ new AggregateTranslator(
+ new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
+ new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
+ new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
+ new NumericEntityUnescaper()
+ );
+
+ public static final CharSequenceTranslator UNESCAPE_XML =
+ new AggregateTranslator(
+ new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
+ new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
+ new NumericEntityUnescaper()
+ );
+
+ public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
+
+ static class CsvUnescaper extends CharSequenceTranslator {
+
+ private static final char CSV_DELIMITER = ',';
+ private static final char CSV_QUOTE = '"';
+ private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
+ private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
+
+ // TODO: Replace with a RegexTranslator. That should consume the number of characters the regex uses up?
+ @Override
+ public int translate(CharSequence input, int index, Writer out) throws IOException {
+
+ if(index != 0) {
+ throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
+ }
+
+ if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
+ out.write(input.toString());
+ return input.length();
+ }
+
+ // strip quotes
+ String quoteless = input.subSequence(1, input.length() - 1).toString();
+
+ if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
+ // deal with escaped quotes; ie) ""
+ out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
+ } else {
+ out.write(input.toString());
+ }
+ return input.length();
+ }
+ }
+
+ /* Helper functions */
+
/**
*
StringEscapeUtils
instances should NOT be constructed in
* standard programming.
null
if null string input
*/
- public static String escapeJava(String str) {
- return EscapeUtils.escapeJava(str);
+ public static final String escapeJava(String input) {
+ return ESCAPE_JAVA.translate(input);
}
/**
- * Escapes the characters in a String
using Java String rules to
- * a Writer
.
A null
string input has no effect.
null
- * @throws IOException if error occurs on underlying Writer
- */
- public static void escapeJava(Writer out, String str) throws IOException {
- EscapeUtils.ESCAPE_JAVA.translate(str, out);
- }
-
- /**
- * Escapes the characters in a String
using JavaScript String rules.
Escapes any values it finds into their JavaScript String form. + *
Escapes the characters in a String
using EcmaScript String rules.
Escapes any values it finds into their EcmaScript String form. * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)
* *So a tab becomes the characters '\\'
and
* 't'
.
The only difference between Java strings and JavaScript strings - * is that in JavaScript, a single quote and forward-slash (/) are escaped.
+ *The only difference between Java strings and EcmaScript strings + * is that in EcmaScript, a single quote and forward-slash (/) are escaped.
+ * + *Note that EcmaScript is best known by the JavaScript and ActionScript dialects.
* *Example: *
@@ -119,27 +258,11 @@ public static void escapeJava(Writer out, String str) throws IOException { ** * - * @param str String to escape values in, may be null + * @param input String to escape values in, may be null * @return String with escaped values,
null
if null string input
*/
- public static String escapeJavaScript(String str) {
- return EscapeUtils.escapeEcmaScript(str);
- }
-
- /**
- * Escapes the characters in a String
using JavaScript String rules
- * to a Writer
.
A null
string input has no effect.
null
- * @throws IOException if error occurs on underlying Writer
- **/
- public static void escapeJavaScript(Writer out, String str) throws IOException {
- EscapeUtils.ESCAPE_ECMASCRIPT.translate(str, out);
+ public static final String escapeEcmaScript(String input) {
+ return ESCAPE_ECMASCRIPT.translate(input);
}
/**
@@ -148,65 +271,26 @@ public static void escapeJavaScript(Writer out, String str) throws IOException {
* 'n'
into a newline character, unless the '\'
* is preceded by another '\'
.
*
- * @param str the String
to unescape, may be null
+ * @param input the String
to unescape, may be null
* @return a new unescaped String
, null
if null string input
*/
- public static String unescapeJava(String str) {
- return UnescapeUtils.unescapeJava(str);
+ public static final String unescapeJava(String input) {
+ return UNESCAPE_JAVA.translate(input);
}
/**
- * Unescapes any Java literals found in the String
to a
- * Writer
.
For example, it will turn a sequence of '\'
and
- * 'n'
into a newline character, unless the '\'
- * is preceded by another '\'
.
A null
string input has no effect.
Writer
used to output unescaped characters
- * @param str the String
to unescape, may be null
- * @throws IllegalArgumentException if the Writer is null
- * @throws IOException if error occurs on underlying Writer
- */
- public static void unescapeJava(Writer out, String str) throws IOException {
- UnescapeUtils.UNESCAPE_JAVA.translate(str, out);
- }
-
- /**
- * Unescapes any JavaScript literals found in the String
.
Unescapes any EcmaScript literals found in the String
.
For example, it will turn a sequence of '\'
and 'n'
* into a newline character, unless the '\'
is preceded by another
* '\'
.
String
to unescape, may be null
+ * @param input the String
to unescape, may be null
* @return A new unescaped String
, null
if null string input
*/
- public static String unescapeJavaScript(String str) {
- return UnescapeUtils.unescapeEcmaScript(str);
- }
-
- /**
- * Unescapes any JavaScript literals found in the String
to a
- * Writer
.
For example, it will turn a sequence of '\'
and 'n'
- * into a newline character, unless the '\'
is preceded by another
- * '\'
.
A null
string input has no effect.
Writer
used to output unescaped characters
- * @param str the String
to unescape, may be null
- * @throws IllegalArgumentException if the Writer is null
- * @throws IOException if error occurs on underlying Writer
- */
- public static void unescapeJavaScript(Writer out, String str) throws IOException {
- UnescapeUtils.UNESCAPE_ECMASCRIPT.translate(str, out);
+ public static final String unescapeEcmaScript(String input) {
+ return UNESCAPE_ECMASCRIPT.translate(input);
}
// HTML and XML
@@ -227,7 +311,7 @@ public static void unescapeJavaScript(Writer out, String str) throws IOException
* Note that the commonly used apostrophe escape character (')
* is not a legal entity and so is not supported).
*
- * @param str the String
to escape, may be null
+ * @param input the String
to escape, may be null
* @return a new escaped String
, null
if null string input
*
* @see #unescapeHtml(String)
@@ -237,43 +321,14 @@ public static void unescapeJavaScript(Writer out, String str) throws IOException
* @see HTML 4.01 Character References
* @see HTML 4.01 Code positions
*/
- public static String escapeHtml(String str) {
- return EscapeUtils.escapeHtml4(str);
+ public static final String escapeHtml4(String input) {
+ return ESCAPE_HTML4.translate(input);
}
- /**
- * Escapes the characters in a String
using HTML entities and writes
- * them to a Writer
.
- * For example: - *
- *"bread" & "butter"
- * becomes:
- *"bread" & "butter"
.
- *
- * Supports all known HTML 4.0 entities, including funky accents. - * Note that the commonly used apostrophe escape character (') - * is not a legal entity and so is not supported).
- * - * @param writer the writer receiving the escaped string, not null - * @param string theString
to escape, may be null
- * @throws IllegalArgumentException if the writer is null
- * @throws IOException when Writer
passed throws the exception from
- * calls to the {@link Writer#write(int)} methods.
- *
- * @see #escapeHtml(String)
- * @see #unescapeHtml(String)
- * @see ISO Entities
- * @see HTML 3.2 Character Entities for ISO Latin-1
- * @see HTML 4.0 Character entity references
- * @see HTML 4.01 Character References
- * @see HTML 4.01 Code positions
- */
- public static void escapeHtml(Writer writer, String string) throws IOException {
- EscapeUtils.ESCAPE_HTML4.translate(string, writer);
+ public static final String escapeHtml3(String input) {
+ return ESCAPE_HTML3.translate(input);
}
-
+
//-----------------------------------------------------------------------
/**
* Unescapes a string containing entity escapes to a string @@ -287,34 +342,15 @@ public static void escapeHtml(Writer writer, String string) throws IOException { * verbatim into the result string. e.g. ">&zzzz;x" will * become ">&zzzz;x".
* - * @param str theString
to unescape, may be null
+ * @param input the String
to unescape, may be null
* @return a new unescaped String
, null
if null string input
- * @see #escapeHtml(Writer, String)
*/
- public static String unescapeHtml(String str) {
- return UnescapeUtils.unescapeHtml4(str);
+ public static final String unescapeHtml4(String input) {
+ return UNESCAPE_HTML4.translate(input);
}
- /**
- * Unescapes a string containing entity escapes to a string - * containing the actual Unicode characters corresponding to the - * escapes. Supports HTML 4.0 entities.
- * - *For example, the string "<Français>" - * will become "<Français>"
- * - *If an entity is unrecognized, it is left alone, and inserted - * verbatim into the result string. e.g. ">&zzzz;x" will - * become ">&zzzz;x".
- * - * @param writer the writer receiving the unescaped string, not null - * @param string theString
to unescape, may be null
- * @throws IllegalArgumentException if the writer is null
- * @throws IOException if an IOException occurs
- * @see #escapeHtml(String)
- */
- public static void unescapeHtml(Writer writer, String string) throws IOException {
- UnescapeUtils.UNESCAPE_HTML4.translate(string, writer);
+ public static final String unescapeHtml3(String input) {
+ return UNESCAPE_HTML3.translate(input);
}
//-----------------------------------------------------------------------
@@ -331,36 +367,14 @@ public static void unescapeHtml(Writer writer, String string) throws IOException
* Note that unicode characters greater than 0x7f are as of 3.0, no longer * escaped.
* - * @param writer the writer receiving the unescaped string, not null - * @param str theString
to escape, may be null
- * @throws IllegalArgumentException if the writer is null
- * @throws IOException if there is a problem writing
- * @see #unescapeXml(java.lang.String)
- */
- public static void escapeXml(Writer writer, String str) throws IOException {
- EscapeUtils.ESCAPE_XML.translate(str, writer);
- }
-
- /**
- * Escapes the characters in a String
using XML entities.
For example: "bread" & "butter" => - * "bread" & "butter". - *
- * - *Supports only the five basic XML entities (gt, lt, quot, amp, apos). - * Does not support DTDs or external entities.
- * - *Note that unicode characters greater than 0x7f are as of 3.0, no longer - * escaped.
- * - * @param str theString
to escape, may be null
+ * @param input the String
to escape, may be null
* @return a new escaped String
, null
if null string input
* @see #unescapeXml(java.lang.String)
*/
- public static String escapeXml(String str) {
- return EscapeUtils.escapeXml(str);
+ public static final String escapeXml(String input) {
+ return ESCAPE_XML.translate(input);
}
+
//-----------------------------------------------------------------------
/**
@@ -374,34 +388,14 @@ public static String escapeXml(String str) {
* Note that numerical \\u unicode codes are unescaped to their respective * unicode characters. This may change in future releases.
* - * @param writer the writer receiving the unescaped string, not null - * @param str theString
to unescape, may be null
- * @throws IllegalArgumentException if the writer is null
- * @throws IOException if there is a problem writing
- * @see #escapeXml(String)
- */
- public static void unescapeXml(Writer writer, String str) throws IOException {
- UnescapeUtils.UNESCAPE_XML.translate(str, writer);
- }
-
- /**
- * Unescapes a string containing XML entity escapes to a string - * containing the actual Unicode characters corresponding to the - * escapes.
- * - *Supports only the five basic XML entities (gt, lt, quot, amp, apos). - * Does not support DTDs or external entities.
- * - *Note that numerical \\u unicode codes are unescaped to their respective - * unicode characters. This may change in future releases.
- * - * @param str theString
to unescape, may be null
+ * @param input the String
to unescape, may be null
* @return a new unescaped String
, null
if null string input
* @see #escapeXml(String)
*/
- public static String unescapeXml(String str) {
- return UnescapeUtils.unescapeXml(str);
+ public static final String unescapeXml(String input) {
+ return UNESCAPE_XML.translate(input);
}
+
//-----------------------------------------------------------------------
@@ -422,40 +416,13 @@ public static String unescapeXml(String str) {
* see Wikipedia and
* RFC 4180.
*
- * @param str the input CSV column String, may be null
+ * @param input the input CSV column String, may be null
* @return the input String, enclosed in double quotes if the value contains a comma,
* newline or double quote, null
if null string input
* @since 2.4
*/
- public static String escapeCsv(String str) {
- return EscapeUtils.escapeCsv(str);
- }
-
- /**
- * Writes a String
value for a CSV column enclosed in double quotes,
- * if required.
If the value contains a comma, newline or double quote, then the - * String value is written enclosed in double quotes.
- * - * - *Any double quote characters in the value are escaped with another double quote.
- * - *If the value does not contain a comma, newline or double quote, then the - * String value is written unchanged (null values are ignored).
- * - * - * see Wikipedia and - * RFC 4180. - * - * @param str the input CSV column String, may be null - * @param out Writer to write input string to, enclosed in double quotes if it contains - * a comma, newline or double quote - * @throws IOException if error occurs on underlying Writer - * @since 2.4 - */ - public static void escapeCsv(Writer out, String str) throws IOException { - EscapeUtils.ESCAPE_CSV.translate(str, out); + public static final String escapeCsv(String input) { + return ESCAPE_CSV.translate(input); } /** @@ -475,40 +442,13 @@ public static void escapeCsv(Writer out, String str) throws IOException { * see Wikipedia and * RFC 4180. * - * @param str the input CSV column String, may be null + * @param input the input CSV column String, may be null * @return the input String, with enclosing double quotes removed and embedded double * quotes unescaped,null
if null string input
* @since 2.4
*/
- public static String unescapeCsv(String str) {
- return UnescapeUtils.unescapeCsv(str);
- }
-
- /**
- * Returns a String
value for an unescaped CSV column.
If the value is enclosed in double quotes, and contains a comma, newline - * or double quote, then quotes are removed. - *
- * - *Any double quote escaped characters (a pair of double quotes) are unescaped - * to just one double quote.
- * - *If the value is not enclosed in double quotes, or is and does not contain a - * comma, newline or double quote, then the String value is returned unchanged.
- * - * - * see Wikipedia and - * RFC 4180. - * - * @param str the input CSV column String, may be null - * @param out Writer to write the input String to, with enclosing double quotes - * removed and embedded double quotes unescaped,null
if null string input
- * @throws IOException if error occurs on underlying Writer
- * @since 2.4
- */
- public static void unescapeCsv(Writer out, String str) throws IOException {
- UnescapeUtils.UNESCAPE_CSV.translate(str, out);
+ public static final String unescapeCsv(String input) {
+ return UNESCAPE_CSV.translate(input);
}
}
diff --git a/src/java/org/apache/commons/lang/text/translate/EscapeUtils.java b/src/java/org/apache/commons/lang/text/translate/EscapeUtils.java
deleted file mode 100644
index 579ace1b9..000000000
--- a/src/java/org/apache/commons/lang/text/translate/EscapeUtils.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.lang.text.translate;
-
-import java.io.IOException;
-import java.io.Writer;
-
-// CsvEscaper
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.lang.CharUtils;
-
-/**
- * Helper class defining various standard language escape functions.
- *
- * @author Apache Software Foundation
- * @since 3.0
- */
-public class EscapeUtils {
-
- public static final CharSequenceTranslator ESCAPE_JAVA =
- new LookupTranslator(
- new String[][] {
- {"\"", "\\\""},
- {"\\", "\\\\"},
- }).with(
- new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
- ).with(
- UnicodeEscaper.outsideOf(32, 0x7f)
- );
-
- public static final String escapeJava(String input) {
- return ESCAPE_JAVA.translate(input);
- }
-
- public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
- new AggregateTranslator(
- new LookupTranslator(
- new String[][] {
- {"'", "\\'"},
- {"\"", "\\\""},
- {"\\", "\\\\"},
- {"/", "\\/"}
- }),
- new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
- UnicodeEscaper.outsideOf(32, 0x7f)
- );
-
- public static final String escapeEcmaScript(String input) {
- return ESCAPE_ECMASCRIPT.translate(input);
- }
-
- public static final CharSequenceTranslator ESCAPE_XML =
- new AggregateTranslator(
- new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
- new LookupTranslator(EntityArrays.APOS_ESCAPE())
- );
-
- public static final String escapeXml(String input) {
- return ESCAPE_XML.translate(input);
- }
-
- public static final CharSequenceTranslator ESCAPE_HTML3 =
- new AggregateTranslator(
- new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
- new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
- );
-
- public static final String escapeHtml3(String input) {
- return ESCAPE_HTML3.translate(input);
- }
-
- public static final CharSequenceTranslator ESCAPE_HTML4 =
- new AggregateTranslator(
- new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
- new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
- new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
- );
-
- public static final String escapeHtml4(String input) {
- return ESCAPE_HTML4.translate(input);
- }
-
- public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
-
- public static final String escapeCsv(String input) {
- return ESCAPE_CSV.translate(input);
- }
-
- // TODO: Create a parent class - 'SinglePassTranslator' ?
- // TODO: It would handle the index checking, and length returning, and
- // TODO: could also have an optimization check method.
- static class CsvEscaper extends CharSequenceTranslator {
-
- private static final char CSV_DELIMITER = ',';
- private static final char CSV_QUOTE = '"';
- private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
- private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
-
- // TODO: Replace with a RegexTranslator. That should consume the number of characters the regex uses up?
- @Override
- public int translate(CharSequence input, int index, Writer out) throws IOException {
-
- if(index != 0) {
- throw new IllegalStateException("CsvEscaper should never reach the [1] index");
- }
-
- if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
- out.write(input.toString());
- } else {
- out.write(CSV_QUOTE);
- out.write(StringUtils.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
- out.write(CSV_QUOTE);
- }
- return input.length();
- }
- }
-
-}
diff --git a/src/java/org/apache/commons/lang/text/translate/UnescapeUtils.java b/src/java/org/apache/commons/lang/text/translate/UnescapeUtils.java
deleted file mode 100644
index 24faba562..000000000
--- a/src/java/org/apache/commons/lang/text/translate/UnescapeUtils.java
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.lang.text.translate;
-
-import java.io.IOException;
-import java.io.Writer;
-
-// CsvUnescaper
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.lang.CharUtils;
-
-/**
- * Helper class defining various standard language unescape functions.
- *
- * @author Apache Software Foundation
- * @since 3.0
- */
-public class UnescapeUtils {
-
- // throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
- public static final CharSequenceTranslator UNESCAPE_JAVA =
- new AggregateTranslator(
- new UnicodeUnescaper(),
- new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
- new LookupTranslator(
- new String[][] {
- {"\\\\", "\\"},
- {"\\\"", "\""},
- {"\\'", "'"},
- {"\\", ""}
- })
- );
-
- public static final String unescapeJava(String input) {
- return UNESCAPE_JAVA.translate(input);
- }
-
- public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
-
- public static final String unescapeEcmaScript(String input) {
- return UNESCAPE_ECMASCRIPT.translate(input);
- }
-
- public static final CharSequenceTranslator UNESCAPE_HTML3 =
- new AggregateTranslator(
- new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
- new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
- new NumericEntityUnescaper()
- );
-
- public static final String unescapeHtml3(String input) {
- return UNESCAPE_HTML3.translate(input);
- }
-
- public static final CharSequenceTranslator UNESCAPE_HTML4 =
- new AggregateTranslator(
- new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
- new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
- new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
- new NumericEntityUnescaper()
- );
-
- public static final String unescapeHtml4(String input) {
- return UNESCAPE_HTML4.translate(input);
- }
-
- public static final CharSequenceTranslator UNESCAPE_XML =
- new AggregateTranslator(
- new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
- new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
- new NumericEntityUnescaper()
- );
-
- public static final String unescapeXml(String input) {
- return UNESCAPE_XML.translate(input);
- }
-
- public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
-
- public static final String unescapeCsv(String input) {
- return UNESCAPE_CSV.translate(input);
- }
-
- static class CsvUnescaper extends CharSequenceTranslator {
-
- private static final char CSV_DELIMITER = ',';
- private static final char CSV_QUOTE = '"';
- private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
- private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
-
- // TODO: Replace with a RegexTranslator. That should consume the number of characters the regex uses up?
- @Override
- public int translate(CharSequence input, int index, Writer out) throws IOException {
-
- if(index != 0) {
- throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
- }
-
- if ( input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE ) {
- out.write(input.toString());
- return input.length();
- }
-
- // strip quotes
- String quoteless = input.subSequence(1, input.length() - 1).toString();
-
- if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
- // deal with escaped quotes; ie) ""
- out.write(StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
- } else {
- out.write(input.toString());
- }
- return input.length();
- }
- }
-
-}
diff --git a/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java b/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java
index 6a3111641..fd56b466d 100644
--- a/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java
+++ b/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java
@@ -63,14 +63,14 @@ public void testConstructor() {
public void testEscapeJava() throws IOException {
assertEquals(null, StringEscapeUtils.escapeJava(null));
try {
- StringEscapeUtils.escapeJava(null, null);
+ StringEscapeUtils.ESCAPE_JAVA.translate(null, null);
fail();
} catch (IOException ex) {
fail();
} catch (IllegalArgumentException ex) {
}
try {
- StringEscapeUtils.escapeJava(null, "");
+ StringEscapeUtils.ESCAPE_JAVA.translate("", null);
fail();
} catch (IOException ex) {
fail();
@@ -123,21 +123,21 @@ private void assertEscapeJava(String message, String expected, String original)
assertEquals(message, expected, converted);
StringWriter writer = new StringWriter();
- StringEscapeUtils.escapeJava(writer, original);
+ StringEscapeUtils.ESCAPE_JAVA.translate(original, writer);
assertEquals(expected, writer.toString());
}
public void testUnescapeJava() throws IOException {
assertEquals(null, StringEscapeUtils.unescapeJava(null));
try {
- StringEscapeUtils.unescapeJava(null, null);
+ StringEscapeUtils.UNESCAPE_JAVA.translate(null, null);
fail();
} catch (IOException ex) {
fail();
} catch (IllegalArgumentException ex) {
}
try {
- StringEscapeUtils.unescapeJava(null, "");
+ StringEscapeUtils.UNESCAPE_JAVA.translate("", null);
fail();
} catch (IOException ex) {
fail();
@@ -177,31 +177,31 @@ private void assertUnescapeJava(String message, String unescaped, String origina
expected, actual);
StringWriter writer = new StringWriter();
- StringEscapeUtils.unescapeJava(writer, original);
+ StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer);
assertEquals(unescaped, writer.toString());
}
- public void testEscapeJavaScript() {
- assertEquals(null, StringEscapeUtils.escapeJavaScript(null));
+ public void testEscapeEcmaScript() {
+ assertEquals(null, StringEscapeUtils.escapeEcmaScript(null));
try {
- StringEscapeUtils.escapeJavaScript(null, null);
+ StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null);
fail();
} catch (IOException ex) {
fail();
} catch (IllegalArgumentException ex) {
}
try {
- StringEscapeUtils.escapeJavaScript(null, "");
+ StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null);
fail();
} catch (IOException ex) {
fail();
} catch (IllegalArgumentException ex) {
}
- assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeJavaScript("He didn't say, \"stop!\""));
+ assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\""));
assertEquals("document.getElementById(\\\"test\\\").value = \\'';"));
+ StringEscapeUtils.escapeEcmaScript("document.getElementById(\"test\").value = '';"));
}
@@ -227,10 +227,10 @@ public void testEscapeHtml() {
String message = htmlEscapes[i][0];
String expected = htmlEscapes[i][1];
String original = htmlEscapes[i][2];
- assertEquals(message, expected, StringEscapeUtils.escapeHtml(original));
+ assertEquals(message, expected, StringEscapeUtils.escapeHtml4(original));
StringWriter sw = new StringWriter();
try {
- StringEscapeUtils.escapeHtml(sw, original);
+ StringEscapeUtils.ESCAPE_HTML4.translate(original, sw);
} catch (IOException e) {
}
String actual = original == null ? null : sw.toString();
@@ -238,16 +238,16 @@ public void testEscapeHtml() {
}
}
- public void testUnescapeHtml() {
+ public void testUnescapeHtml4() {
for (int i = 0; i < htmlEscapes.length; ++i) {
String message = htmlEscapes[i][0];
String expected = htmlEscapes[i][2];
String original = htmlEscapes[i][1];
- assertEquals(message, expected, StringEscapeUtils.unescapeHtml(original));
+ assertEquals(message, expected, StringEscapeUtils.unescapeHtml4(original));
StringWriter sw = new StringWriter();
try {
- StringEscapeUtils.unescapeHtml(sw, original);
+ StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw);
} catch (IOException e) {
}
String actual = original == null ? null : sw.toString();
@@ -256,18 +256,18 @@ public void testUnescapeHtml() {
// \u00E7 is a cedilla (c with wiggle under)
// note that the test string must be 7-bit-clean (unicode escaped) or else it will compile incorrectly
// on some locales
- assertEquals("funny chars pass through OK", "Fran\u00E7ais", StringEscapeUtils.unescapeHtml("Fran\u00E7ais"));
+ assertEquals("funny chars pass through OK", "Fran\u00E7ais", StringEscapeUtils.unescapeHtml4("Fran\u00E7ais"));
- assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml("Hello&;World"));
- assertEquals("HelloWorld", StringEscapeUtils.unescapeHtml("HelloWorld"));
- assertEquals("Hello ;World", StringEscapeUtils.unescapeHtml("Hello ;World"));
- assertEquals("Hello#;World", StringEscapeUtils.unescapeHtml("Hello#;World"));
+ assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml4("Hello&;World"));
+ assertEquals("HelloWorld", StringEscapeUtils.unescapeHtml4("HelloWorld"));
+ assertEquals("Hello ;World", StringEscapeUtils.unescapeHtml4("Hello ;World"));
+ assertEquals("Hello#;World", StringEscapeUtils.unescapeHtml4("Hello#;World"));
}
public void testUnescapeHexCharsHtml() {
// Simple easy to grok test
- assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml(""));
- assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml(""));
+ assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4(""));
+ assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4(""));
// Test all Character values:
for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) {
Character c1 = new Character(i);
@@ -275,19 +275,19 @@ public void testUnescapeHexCharsHtml() {
String expected = c1.toString() + c2.toString();
String escapedC1 = "" + Integer.toHexString((c1.charValue())) + ";";
String escapedC2 = "" + Integer.toHexString((c2.charValue())) + ";";
- assertEquals("hex number unescape index " + (int)i, expected, StringEscapeUtils.unescapeHtml(escapedC1 + escapedC2));
+ assertEquals("hex number unescape index " + (int)i, expected, StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2));
}
}
public void testUnescapeUnknownEntity() throws Exception
{
- assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml("&zzzz;"));
+ assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;"));
}
public void testEscapeHtmlVersions() throws Exception
{
- assertEquals("Β", StringEscapeUtils.escapeHtml("\u0392"));
- assertEquals("\u0392", StringEscapeUtils.unescapeHtml("Β"));
+ assertEquals("Β", StringEscapeUtils.escapeHtml4("\u0392"));
+ assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("Β"));
//todo: refine API for escaping/unescaping specific HTML versions
@@ -310,14 +310,14 @@ public void testEscapeXml() throws Exception {
StringWriter sw = new StringWriter();
try {
- StringEscapeUtils.escapeXml(sw, "", StringEscapeUtils.unescapeHtml("<P&O>")); - assertEquals("test & <", StringEscapeUtils.unescapeHtml("test & <")); + assertEquals("
", StringEscapeUtils.unescapeHtml4("<P&O>")); + assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & <")); assertEquals("
", StringEscapeUtils.unescapeXml("<P&O>")); assertEquals("test & <", StringEscapeUtils.unescapeXml("test & <")); } public void testLang313() { - assertEquals("& &", StringEscapeUtils.unescapeHtml("& &")); + assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &")); } public void testEscapeCsvString() throws Exception @@ -361,7 +361,7 @@ public void testEscapeCsvWriter() throws Exception private void checkCsvEscapeWriter(String expected, String value) { try { StringWriter writer = new StringWriter(); - StringEscapeUtils.escapeCsv(writer, value); + StringEscapeUtils.ESCAPE_CSV.translate(value, writer); assertEquals(expected, writer.toString()); } catch (IOException e) { fail("Threw: " + e); @@ -397,7 +397,7 @@ public void testUnescapeCsvWriter() throws Exception private void checkCsvUnescapeWriter(String expected, String value) { try { StringWriter writer = new StringWriter(); - StringEscapeUtils.unescapeCsv(writer, value); + StringEscapeUtils.UNESCAPE_CSV.translate(value, writer); assertEquals(expected, writer.toString()); } catch (IOException e) { fail("Threw: " + e); @@ -414,14 +414,14 @@ public void testEscapeHtmlHighUnicode() throws java.io.UnsupportedEncodingExcept String original = new String(data, "UTF8"); - String escaped = StringEscapeUtils.escapeHtml( original ); + String escaped = StringEscapeUtils.escapeHtml4( original ); assertEquals( "High unicode should not have been escaped", original, escaped); - String unescaped = StringEscapeUtils.unescapeHtml( escaped ); + String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); assertEquals( "High unicode should have been unchanged", original, unescaped); // TODO: I think this should hold, needs further investigation -// String unescapedFromEntity = StringEscapeUtils.unescapeHtml( "𝍢" ); +// String unescapedFromEntity = StringEscapeUtils.unescapeHtml4( "𝍢" ); // assertEquals( "High unicode should have been unescaped", original, unescapedFromEntity); } @@ -429,11 +429,11 @@ public void testEscapeHtmlHighUnicode() throws java.io.UnsupportedEncodingExcept public void testEscapeHiragana() throws java.io.UnsupportedEncodingException { // Some random Japanese unicode characters String original = "\u304B\u304C\u3068"; - String escaped = StringEscapeUtils.escapeHtml(original); - assertEquals( "Hiragana character unicode behaviour should not be being escaped by escapeHtml", + String escaped = StringEscapeUtils.escapeHtml4(original); + assertEquals( "Hiragana character unicode behaviour should not be being escaped by escapeHtml4", original, escaped); - String unescaped = StringEscapeUtils.unescapeHtml( escaped ); + String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); assertEquals( "Hiragana character unicode behaviour has changed - expected no unescaping", escaped, unescaped); } diff --git a/src/test/org/apache/commons/lang/text/translate/EscapeUtilsTest.java b/src/test/org/apache/commons/lang/text/translate/EscapeUtilsTest.java deleted file mode 100644 index 27d03ccfa..000000000 --- a/src/test/org/apache/commons/lang/text/translate/EscapeUtilsTest.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.lang.text.translate; - -import junit.framework.TestCase; - -/** - * Unit tests for {@link org.apache.commons.lang.text.translate.EscapeUtils}. - */ -public class EscapeUtilsTest extends TestCase { - - public void testConstructorExists() { - new EscapeUtils(); - } -} diff --git a/src/test/org/apache/commons/lang/text/translate/UnescapeUtilsTest.java b/src/test/org/apache/commons/lang/text/translate/UnescapeUtilsTest.java deleted file mode 100644 index fc090fe27..000000000 --- a/src/test/org/apache/commons/lang/text/translate/UnescapeUtilsTest.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.lang.text.translate; - -import junit.framework.TestCase; - -/** - * Unit tests for {@link org.apache.commons.lang.text.translate.UnescapeUtils}. - */ -public class UnescapeUtilsTest extends TestCase { - - public void testConstructorExists() { - new UnescapeUtils(); - } -}