[CSV-168] CsvFormat.nullString should not be escaped. [CSV-170]

CSVFormat.MYSQL nullString should be "\N".

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1725407 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Gary D. Gregory 2016-01-19 02:34:19 +00:00
parent 913e4a8b97
commit f8b80e8341
4 changed files with 258 additions and 61 deletions

View File

@ -42,6 +42,8 @@
<action issue="CSV-153" type="update" dev="britter" due-to="Wren">CSVPrinter doesn't skip creation of header record if skipHeaderRecord is set to true</action> <action issue="CSV-153" type="update" dev="britter" due-to="Wren">CSVPrinter doesn't skip creation of header record if skipHeaderRecord is set to true</action>
<action issue="CSV-159" type="add" dev="ggregory" due-to="Yamil Medina">Add IgnoreCase option for accessing header names</action> <action issue="CSV-159" type="add" dev="ggregory" due-to="Yamil Medina">Add IgnoreCase option for accessing header names</action>
<action issue="CSV-169" type="add" dev="ggregory" due-to="Gary Gregory">The null string should be case-sensitive when reading records</action> <action issue="CSV-169" type="add" dev="ggregory" due-to="Gary Gregory">The null string should be case-sensitive when reading records</action>
<action issue="CSV-168" type="fix" dev="ggregory" due-to="Gary Gregory, cornel creanga">CsvFormat.nullString should not be escaped</action>
<action issue="CSV-170" type="fix" dev="ggregory" due-to="Gary Gregory, cornel creanga">CSVFormat.MYSQL nullString should be "\N"</action>
</release> </release>
<release version="1.2" date="2015-08-24" description="Feature and bug fix release"> <release version="1.2" date="2015-08-24" description="Feature and bug fix release">
<action issue="CSV-145" type="fix" dev="ggregory" due-to="Frank Ulbricht">CSVFormat.with* methods clear the header comments</action> <action issue="CSV-145" type="fix" dev="ggregory" due-to="Frank Ulbricht">CSVFormat.with* methods clear the header comments</action>

View File

@ -296,7 +296,7 @@ public final class CSVFormat implements Serializable {
* *
* <p> * <p>
* This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
* characters are escaped with '\'. * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
* </p> * </p>
* *
* <p> * <p>
@ -308,6 +308,7 @@ public final class CSVFormat implements Serializable {
* <li>withRecordSeparator('\n')</li> * <li>withRecordSeparator('\n')</li>
* <li>withIgnoreEmptyLines(false)</li> * <li>withIgnoreEmptyLines(false)</li>
* <li>withEscape('\\')</li> * <li>withEscape('\\')</li>
* <li>withNullString("\\N")</li>
* </ul> * </ul>
* *
* @see Predefined#MySQL * @see Predefined#MySQL
@ -315,7 +316,7 @@ public final class CSVFormat implements Serializable {
* http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a> * http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
*/ */
public static final CSVFormat MYSQL = DEFAULT.withDelimiter(TAB).withEscape(BACKSLASH).withIgnoreEmptyLines(false) public static final CSVFormat MYSQL = DEFAULT.withDelimiter(TAB).withEscape(BACKSLASH).withIgnoreEmptyLines(false)
.withQuote(null).withRecordSeparator(LF); .withQuote(null).withRecordSeparator(LF).withNullString("\\N");
/** /**
* Returns true if the given character is a line break character. * Returns true if the given character is a line break character.

View File

@ -136,7 +136,9 @@ public final class CSVPrinter implements Flushable, Closeable {
if (!newRecord) { if (!newRecord) {
out.append(format.getDelimiter()); out.append(format.getDelimiter());
} }
if (format.isQuoteCharacterSet()) { if (object == null) {
out.append(value);
} else if (format.isQuoteCharacterSet()) {
// the original object is needed so can check for Number // the original object is needed so can check for Number
printAndQuote(object, value, offset, len); printAndQuote(object, value, offset, len);
} else if (format.isEscapeCharacterSet()) { } else if (format.isEscapeCharacterSet()) {

View File

@ -18,6 +18,7 @@
package org.apache.commons.csv; package org.apache.commons.csv;
import static org.apache.commons.csv.Constants.CR; import static org.apache.commons.csv.Constants.CR;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
@ -35,6 +36,7 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
import org.apache.commons.lang3.ObjectUtils;
import org.junit.Test; import org.junit.Test;
/** /**
@ -44,6 +46,8 @@ import org.junit.Test;
*/ */
public class CSVPrinterTest { public class CSVPrinterTest {
private static final int ITERATIONS_FOR_RANDOM_TEST = 50000;
private final String recordSeparator = CSVFormat.DEFAULT.getRecordSeparator(); private final String recordSeparator = CSVFormat.DEFAULT.getRecordSeparator();
private static String printable(final String s) { private static String printable(final String s) {
@ -65,21 +69,14 @@ public class CSVPrinterTest {
final int nLines = r.nextInt(4) + 1; final int nLines = r.nextInt(4) + 1;
final int nCol = r.nextInt(3) + 1; final int nCol = r.nextInt(3) + 1;
// nLines=1;nCol=2; // nLines=1;nCol=2;
final String[][] lines = new String[nLines][]; final String[][] lines = generateLines(nLines, nCol);
for (int i = 0; i < nLines; i++) {
final String[] line = new String[nCol];
lines[i] = line;
for (int j = 0; j < nCol; j++) {
line[j] = randStr();
}
}
final StringWriter sw = new StringWriter(); final StringWriter sw = new StringWriter();
final CSVPrinter printer = new CSVPrinter(sw, format); final CSVPrinter printer = new CSVPrinter(sw, format);
for (int i = 0; i < nLines; i++) { for (int i = 0; i < nLines; i++) {
// for (int j=0; j<lines[i].length; j++) System.out.println("### VALUE=:" + printable(lines[i][j])); // for (int j=0; j<lines[i].length; j++) System.out.println("### VALUE=:" + printable(lines[i][j]));
printer.printRecord((Object[])lines[i]); printer.printRecord((Object[]) lines[i]);
} }
printer.flush(); printer.flush();
@ -90,10 +87,26 @@ public class CSVPrinterTest {
final CSVParser parser = CSVParser.parse(result, format); final CSVParser parser = CSVParser.parse(result, format);
final List<CSVRecord> parseResult = parser.getRecords(); final List<CSVRecord> parseResult = parser.getRecords();
Utils.compare("Printer output :" + printable(result), lines, parseResult); String[][] expected = lines.clone();
for (int i = 0; i < expected.length; i++) {
expected[i] = expectNulls(expected[i], format);
}
Utils.compare("Printer output :" + printable(result), expected, parseResult);
parser.close(); parser.close();
} }
private String[][] generateLines(final int nLines, final int nCol) {
final String[][] lines = new String[nLines][];
for (int i = 0; i < nLines; i++) {
final String[] line = new String[nCol];
lines[i] = line;
for (int j = 0; j < nCol; j++) {
line[j] = randStr();
}
}
return lines;
}
private void doRandom(final CSVFormat format, final int iter) throws Exception { private void doRandom(final CSVFormat format, final int iter) throws Exception {
for (int i = 0; i < iter; i++) { for (int i = 0; i < iter; i++) {
doOneRandom(format); doOneRandom(format);
@ -189,8 +202,8 @@ public class CSVPrinterTest {
public void testExcelPrintAllIterableOfLists() throws IOException { public void testExcelPrintAllIterableOfLists() throws IOException {
final StringWriter sw = new StringWriter(); final StringWriter sw = new StringWriter();
final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL); final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.EXCEL);
printer.printRecords(Arrays.asList(new List[] { Arrays.asList("r1c1", "r1c2"), printer.printRecords(
Arrays.asList("r2c1", "r2c2") })); Arrays.asList(new List[] { Arrays.asList("r1c1", "r1c2"), Arrays.asList("r2c1", "r2c2") }));
assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString()); assertEquals("r1c1,r1c2" + recordSeparator + "r2c1,r2c2" + recordSeparator, sw.toString());
printer.close(); printer.close();
} }
@ -301,6 +314,131 @@ public class CSVPrinterTest {
printer.close(); printer.close();
} }
@Test
public void testMySqlNullStringDefault() throws IOException {
assertEquals("\\N", CSVFormat.MYSQL.getNullString());
}
@Test
public void testMySqlNullOutput() throws IOException {
Object[] s = new String[] { "NULL", null };
CSVFormat format = CSVFormat.MYSQL.withQuote('"').withNullString("NULL").withQuoteMode(QuoteMode.NON_NUMERIC);
StringWriter writer = new StringWriter();
CSVPrinter printer = new CSVPrinter(writer, format);
printer.printRecord(s);
printer.close();
String expected = "\"NULL\"\tNULL\n";
assertEquals(expected, writer.toString());
String[] record0 = toFirstRecordValues(expected, format);
assertArrayEquals(new Object[2], record0);
s = new String[] { "\\N", null };
format = CSVFormat.MYSQL.withNullString("\\N");
writer = new StringWriter();
printer = new CSVPrinter(writer, format);
printer.printRecord(s);
printer.close();
expected = "\\\\N\t\\N\n";
assertEquals(expected, writer.toString());
record0 = toFirstRecordValues(expected, format);
assertArrayEquals(expectNulls(s, format), record0);
s = new String[] { "\\N", "A" };
format = CSVFormat.MYSQL.withNullString("\\N");
writer = new StringWriter();
printer = new CSVPrinter(writer, format);
printer.printRecord(s);
printer.close();
expected = "\\\\N\tA\n";
assertEquals(expected, writer.toString());
record0 = toFirstRecordValues(expected, format);
assertArrayEquals(expectNulls(s, format), record0);
s = new String[] { "\n", "A" };
format = CSVFormat.MYSQL.withNullString("\\N");
writer = new StringWriter();
printer = new CSVPrinter(writer, format);
printer.printRecord(s);
printer.close();
expected = "\\n\tA\n";
assertEquals(expected, writer.toString());
record0 = toFirstRecordValues(expected, format);
assertArrayEquals(expectNulls(s, format), record0);
s = new String[] { "", null };
format = CSVFormat.MYSQL.withNullString("NULL");
writer = new StringWriter();
printer = new CSVPrinter(writer, format);
printer.printRecord(s);
printer.close();
expected = "\tNULL\n";
assertEquals(expected, writer.toString());
record0 = toFirstRecordValues(expected, format);
assertArrayEquals(expectNulls(s, format), record0);
s = new String[] { "", null };
format = CSVFormat.MYSQL;
writer = new StringWriter();
printer = new CSVPrinter(writer, format);
printer.printRecord(s);
printer.close();
expected = "\t\\N\n";
assertEquals(expected, writer.toString());
record0 = toFirstRecordValues(expected, format);
assertArrayEquals(expectNulls(s, format), record0);
s = new String[] { "\\N", "", "\u000e,\\\r" };
format = CSVFormat.MYSQL;
writer = new StringWriter();
printer = new CSVPrinter(writer, format);
printer.printRecord(s);
printer.close();
expected = "\\\\N\t\t\u000e,\\\\\\r\n";
assertEquals(expected, writer.toString());
record0 = toFirstRecordValues(expected, format);
assertArrayEquals(expectNulls(s, format), record0);
s = new String[] { "NULL", "\\\r" };
format = CSVFormat.MYSQL;
writer = new StringWriter();
printer = new CSVPrinter(writer, format);
printer.printRecord(s);
printer.close();
expected = "NULL\t\\\\\\r\n";
assertEquals(expected, writer.toString());
record0 = toFirstRecordValues(expected, format);
assertArrayEquals(expectNulls(s, format), record0);
s = new String[] { "\\\r" };
format = CSVFormat.MYSQL;
writer = new StringWriter();
printer = new CSVPrinter(writer, format);
printer.printRecord(s);
printer.close();
expected = "\\\\\\r\n";
assertEquals(expected, writer.toString());
record0 = toFirstRecordValues(expected, format);
assertArrayEquals(expectNulls(s, format), record0);
}
/**
* Converts an input CSV array into expected output values WRT NULLs. NULL strings are converted to null values
* because the parser will convert these strings to null.
*/
private <T> T[] expectNulls(T[] original, CSVFormat csvFormat) {
T[] fixed = original.clone();
for (int i = 0; i < fixed.length; i++) {
if (ObjectUtils.equals(csvFormat.getNullString(), fixed[i])) {
fixed[i] = null;
}
}
return fixed;
}
private String[] toFirstRecordValues(final String expected, CSVFormat format) throws IOException {
return CSVParser.parse(expected, format).getRecords().get(0).values();
}
@Test @Test
public void testPrinter1() throws IOException { public void testPrinter1() throws IOException {
final StringWriter sw = new StringWriter(); final StringWriter sw = new StringWriter();
@ -429,11 +567,28 @@ public class CSVPrinterTest {
} }
@Test @Test
public void testRandom() throws Exception { public void testRandomDefault() throws Exception {
final int iter = 10000; doRandom(CSVFormat.DEFAULT, ITERATIONS_FOR_RANDOM_TEST);
doRandom(CSVFormat.DEFAULT, iter); }
doRandom(CSVFormat.EXCEL, iter);
doRandom(CSVFormat.MYSQL, iter); @Test
public void testRandomExcel() throws Exception {
doRandom(CSVFormat.EXCEL, ITERATIONS_FOR_RANDOM_TEST);
}
@Test
public void testRandomMySql() throws Exception {
doRandom(CSVFormat.MYSQL, ITERATIONS_FOR_RANDOM_TEST);
}
@Test
public void testRandomTdf() throws Exception {
doRandom(CSVFormat.TDF, ITERATIONS_FOR_RANDOM_TEST);
}
@Test
public void testRandomRfc4180() throws Exception {
doRandom(CSVFormat.RFC4180, ITERATIONS_FOR_RANDOM_TEST);
} }
@Test @Test
@ -496,6 +651,42 @@ public class CSVPrinterTest {
printer.close(); printer.close();
} }
@Test
public void testEscapeBackslash() throws IOException {
StringWriter sw = new StringWriter();
final char quoteChar = '\'';
final String eol = "\r\n";
CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(quoteChar));
printer.print("\\");
printer.close();
assertEquals("'\\'", sw.toString());
sw = new StringWriter();
printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(quoteChar));
printer.print("\\\r");
printer.close();
assertEquals("'\\\r'", sw.toString());
sw = new StringWriter();
printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(quoteChar));
printer.print("X\\\r");
printer.close();
assertEquals("'X\\\r'", sw.toString());
sw = new StringWriter();
printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(quoteChar));
printer.printRecord(new Object[] { "\\\r" });
printer.close();
assertEquals("'\\\r'" + eol, sw.toString());
sw = new StringWriter();
printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(quoteChar));
printer.print("\\\\");
printer.close();
assertEquals("'\\\\'", sw.toString());
}
@Test @Test
public void testPlainEscaped() throws IOException { public void testPlainEscaped() throws IOException {
final StringWriter sw = new StringWriter(); final StringWriter sw = new StringWriter();
@ -549,8 +740,7 @@ public class CSVPrinterTest {
@Test @Test
public void testHeader() throws IOException { public void testHeader() throws IOException {
final StringWriter sw = new StringWriter(); final StringWriter sw = new StringWriter();
final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null) final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3"));
.withHeader("C1", "C2", "C3"));
printer.printRecord("a", "b", "c"); printer.printRecord("a", "b", "c");
printer.printRecord("x", "y", "z"); printer.printRecord("x", "y", "z");
assertEquals("C1,C2,C3\r\na,b,c\r\nx,y,z\r\n", sw.toString()); assertEquals("C1,C2,C3\r\na,b,c\r\nx,y,z\r\n", sw.toString());
@ -571,8 +761,8 @@ public class CSVPrinterTest {
public void testSkipHeaderRecordTrue() throws IOException { public void testSkipHeaderRecordTrue() throws IOException {
// functionally identical to testHeaderNotSet, used to test CSV-153 // functionally identical to testHeaderNotSet, used to test CSV-153
final StringWriter sw = new StringWriter(); final StringWriter sw = new StringWriter();
final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null) final CSVPrinter printer = new CSVPrinter(sw,
.withHeader("C1", "C2", "C3").withSkipHeaderRecord(true)); CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3").withSkipHeaderRecord(true));
printer.printRecord("a", "b", "c"); printer.printRecord("a", "b", "c");
printer.printRecord("x", "y", "z"); printer.printRecord("x", "y", "z");
assertEquals("a,b,c\r\nx,y,z\r\n", sw.toString()); assertEquals("a,b,c\r\nx,y,z\r\n", sw.toString());
@ -583,8 +773,8 @@ public class CSVPrinterTest {
public void testSkipHeaderRecordFalse() throws IOException { public void testSkipHeaderRecordFalse() throws IOException {
// functionally identical to testHeader, used to test CSV-153 // functionally identical to testHeader, used to test CSV-153
final StringWriter sw = new StringWriter(); final StringWriter sw = new StringWriter();
final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withQuote(null) final CSVPrinter printer = new CSVPrinter(sw,
.withHeader("C1", "C2", "C3").withSkipHeaderRecord(false)); CSVFormat.DEFAULT.withQuote(null).withHeader("C1", "C2", "C3").withSkipHeaderRecord(false));
printer.printRecord("a", "b", "c"); printer.printRecord("a", "b", "c");
printer.printRecord("x", "y", "z"); printer.printRecord("x", "y", "z");
assertEquals("C1,C2,C3\r\na,b,c\r\nx,y,z\r\n", sw.toString()); assertEquals("C1,C2,C3\r\na,b,c\r\nx,y,z\r\n", sw.toString());
@ -597,7 +787,8 @@ public class CSVPrinterTest {
final Date now = new Date(); final Date now = new Date();
final CSVFormat format = CSVFormat.EXCEL; final CSVFormat format = CSVFormat.EXCEL;
final CSVPrinter csvPrinter = printWithHeaderComments(sw, now, format); final CSVPrinter csvPrinter = printWithHeaderComments(sw, now, format);
assertEquals("# Generated by Apache Commons CSV 1.1\r\n# " + now + "\r\nCol1,Col2\r\nA,B\r\nC,D\r\n", sw.toString()); assertEquals("# Generated by Apache Commons CSV 1.1\r\n# " + now + "\r\nCol1,Col2\r\nA,B\r\nC,D\r\n",
sw.toString());
csvPrinter.close(); csvPrinter.close();
} }
@ -607,7 +798,8 @@ public class CSVPrinterTest {
final Date now = new Date(); final Date now = new Date();
final CSVFormat format = CSVFormat.TDF; final CSVFormat format = CSVFormat.TDF;
final CSVPrinter csvPrinter = printWithHeaderComments(sw, now, format); final CSVPrinter csvPrinter = printWithHeaderComments(sw, now, format);
assertEquals("# Generated by Apache Commons CSV 1.1\r\n# " + now + "\r\nCol1\tCol2\r\nA\tB\r\nC\tD\r\n", sw.toString()); assertEquals("# Generated by Apache Commons CSV 1.1\r\n# " + now + "\r\nCol1\tCol2\r\nA\tB\r\nC\tD\r\n",
sw.toString());
csvPrinter.close(); csvPrinter.close();
} }