From ea4ce1628920e69f9474157e0a31e3250ee2ef48 Mon Sep 17 00:00:00 2001 From: Henri Yandell Date: Sat, 5 Jan 2008 06:33:12 +0000 Subject: [PATCH] Applying my patch from LANG-374 that adds an unescape method. It stays true to the spec and considers "foo.bar" to unescape as "foo.bar", ie) no unescaping, because "foo.bar" is illegal output from the escape method spec-wise. git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@609094 13f79535-47bb-0310-9956-ffa450edef68 --- .../commons/lang/StringEscapeUtils.java | 84 +++++++++++++++++++ .../commons/lang/StringEscapeUtilsTest.java | 37 +++++++- 2 files changed, 120 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/commons/lang/StringEscapeUtils.java b/src/java/org/apache/commons/lang/StringEscapeUtils.java index cc0f4e38c..03b458d79 100644 --- a/src/java/org/apache/commons/lang/StringEscapeUtils.java +++ b/src/java/org/apache/commons/lang/StringEscapeUtils.java @@ -772,4 +772,88 @@ public class StringEscapeUtils { out.write(CSV_QUOTE); } + /** + *

Returns a String value for an unescaped CSV column.

+ * + *

If the value is enclosed in double quotes, and contains a comma, newline + * or double quote, then quotes are removed. + *

+ * + *

Any double quote escaped characters (a pair of double quotes) are unescaped + * to just one double quote.

+ * + *

If the value is not enclosed in double quotes, or is and does not contain a + * comma, newline or double quote, then the String value is returned unchanged.

+ *

+ * + * see Wikipedia and + * RFC 4180. + * + * @param str the input CSV column String, may be null + * @return the input String, with enclosing double quotes removed and embedded double + * quotes unescaped, null if null string input + * @since 2.4 + */ + public static String unescapeCsv(String str) { + if (str == null) { + return null; + } + try { + StringWriter writer = new StringWriter(); + unescapeCsv(writer, str); + return writer.toString(); + } catch (IOException ioe) { + // this should never ever happen while writing to a StringWriter + ioe.printStackTrace(); + return null; + } + } + + /** + *

Returns a String value for an unescaped CSV column.

+ * + *

If the value is enclosed in double quotes, and contains a comma, newline + * or double quote, then quotes are removed. + *

+ * + *

Any double quote escaped characters (a pair of double quotes) are unescaped + * to just one double quote.

+ * + *

If the value is not enclosed in double quotes, or is and does not contain a + * comma, newline or double quote, then the String value is returned unchanged.

+ *

+ * + * see Wikipedia and + * RFC 4180. + * + * @param str the input CSV column String, may be null + * @param out Writer to write the input String to, with enclosing double quotes + * removed and embedded double quotes unescaped, null if null string input + * @throws IOException if error occurs on underlying Writer + * @since 2.4 + */ + public static void unescapeCsv(Writer out, String str) throws IOException { + if (str == null) { + return; + } + if (str.length() < 2) { + out.write(str); + return; + } + if ( str.charAt(0) != CSV_QUOTE || str.charAt(str.length() - 1) != CSV_QUOTE ) { + out.write(str); + return; + } + + // strip quotes + String quoteless = str.substring(1, str.length() - 1); + + if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { + // deal with escaped quotes; ie) "" + str = StringUtils.replace(quoteless, "" + CSV_QUOTE + CSV_QUOTE, Character.toString(CSV_QUOTE)); + } + + out.write(str); + } + } diff --git a/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java b/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java index a116163d5..b0b38f975 100644 --- a/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java +++ b/src/test/org/apache/commons/lang/StringEscapeUtilsTest.java @@ -332,7 +332,6 @@ public class StringEscapeUtilsTest extends TestCase { assertEquals("& &", StringEscapeUtils.unescapeHtml("& &")); } - public void testEscapeCsvString() throws Exception { assertEquals("foo.bar", StringEscapeUtils.escapeCsv("foo.bar")); @@ -364,4 +363,40 @@ public class StringEscapeUtilsTest extends TestCase { fail("Threw: " + e); } } + + public void testUnescapeCsvString() throws Exception + { + assertEquals("foo.bar", StringEscapeUtils.unescapeCsv("foo.bar")); + assertEquals("foo,bar", StringEscapeUtils.unescapeCsv("\"foo,bar\"")); + assertEquals("foo\nbar", StringEscapeUtils.unescapeCsv("\"foo\nbar\"")); + assertEquals("foo\rbar", StringEscapeUtils.unescapeCsv("\"foo\rbar\"")); + assertEquals("foo\"bar", StringEscapeUtils.unescapeCsv("\"foo\"\"bar\"")); + assertEquals("", StringEscapeUtils.unescapeCsv("")); + assertEquals(null, StringEscapeUtils.unescapeCsv(null)); + + assertEquals("\"foo.bar\"", StringEscapeUtils.unescapeCsv("\"foo.bar\"")); + } + + public void testUnescapeCsvWriter() throws Exception + { + checkCsvUnescapeWriter("foo.bar", "foo.bar"); + checkCsvUnescapeWriter("foo,bar", "\"foo,bar\""); + checkCsvUnescapeWriter("foo\nbar", "\"foo\nbar\""); + checkCsvUnescapeWriter("foo\rbar", "\"foo\rbar\""); + checkCsvUnescapeWriter("foo\"bar", "\"foo\"\"bar\""); + checkCsvUnescapeWriter("", null); + checkCsvUnescapeWriter("", ""); + + checkCsvUnescapeWriter("\"foo.bar\"", "\"foo.bar\""); + } + + private void checkCsvUnescapeWriter(String expected, String value) { + try { + StringWriter writer = new StringWriter(); + StringEscapeUtils.unescapeCsv(writer, value); + assertEquals(expected, writer.toString()); + } catch (IOException e) { + fail("Threw: " + e); + } + } }