Use try-with-resources.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1748094 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Gary D. Gregory 2016-06-13 06:56:49 +00:00
parent ed6adc706e
commit 9daee9042c
12 changed files with 1131 additions and 1144 deletions

View File

@ -609,8 +609,8 @@ public final class CSVFormat implements Serializable {
*/ */
public String format(final Object... values) { public String format(final Object... values) {
final StringWriter out = new StringWriter(); final StringWriter out = new StringWriter();
try { try (final CSVPrinter csvPrinter = new CSVPrinter(out, this)) {
new CSVPrinter(out, this).printRecord(values); csvPrinter.printRecord(values);
return out.toString().trim(); return out.toString().trim();
} catch (final IOException e) { } catch (final IOException e) {
// should not happen because a StringWriter does not do IO. // should not happen because a StringWriter does not do IO.

View File

@ -116,19 +116,19 @@ public class CSVFileParserTest {
// Now parse the file and compare against the expected results // Now parse the file and compare against the expected results
// We use a buffered reader internally so no need to create one here. // We use a buffered reader internally so no need to create one here.
final CSVParser parser = CSVParser.parse(new File(BASE, split[0]), Charset.defaultCharset(), format); try (final CSVParser parser = CSVParser.parse(new File(BASE, split[0]), Charset.defaultCharset(), format)) {
for (final CSVRecord record : parser) { for (final CSVRecord record : parser) {
String parsed = Arrays.toString(record.values()); String parsed = Arrays.toString(record.values());
if (checkComments) { if (checkComments) {
final String comment = record.getComment().replace("\n", "\\n"); final String comment = record.getComment().replace("\n", "\\n");
if (comment != null) { if (comment != null) {
parsed += "#" + comment; parsed += "#" + comment;
}
} }
final int count = record.size();
assertEquals(testName, readTestData(), count + ":" + parsed);
} }
final int count = record.size();
assertEquals(testName, readTestData(), count + ":" + parsed);
} }
parser.close();
} }
@Test @Test
@ -160,18 +160,18 @@ public class CSVFileParserTest {
// Now parse the file and compare against the expected results // Now parse the file and compare against the expected results
final URL resource = ClassLoader.getSystemResource("CSVFileParser/" + split[0]); final URL resource = ClassLoader.getSystemResource("CSVFileParser/" + split[0]);
final CSVParser parser = CSVParser.parse(resource, Charset.forName("UTF-8"), format); try (final CSVParser parser = CSVParser.parse(resource, Charset.forName("UTF-8"), format)) {
for (final CSVRecord record : parser) { for (final CSVRecord record : parser) {
String parsed = Arrays.toString(record.values()); String parsed = Arrays.toString(record.values());
if (checkComments) { if (checkComments) {
final String comment = record.getComment().replace("\n", "\\n"); final String comment = record.getComment().replace("\n", "\\n");
if (comment != null) { if (comment != null) {
parsed += "#" + comment; parsed += "#" + comment;
}
} }
final int count = record.size();
assertEquals(testName, readTestData(), count + ":" + parsed);
} }
final int count = record.size();
assertEquals(testName, readTestData(), count + ":" + parsed);
} }
parser.close();
} }
} }

View File

@ -327,10 +327,10 @@ public class CSVFormatTest {
public void testSerialization() throws Exception { public void testSerialization() throws Exception {
final ByteArrayOutputStream out = new ByteArrayOutputStream(); final ByteArrayOutputStream out = new ByteArrayOutputStream();
final ObjectOutputStream oos = new ObjectOutputStream(out); try (final ObjectOutputStream oos = new ObjectOutputStream(out)) {
oos.writeObject(CSVFormat.DEFAULT); oos.writeObject(CSVFormat.DEFAULT);
oos.flush(); oos.flush();
oos.close(); }
final ObjectInputStream in = new ObjectInputStream(new ByteArrayInputStream(out.toByteArray())); final ObjectInputStream in = new ObjectInputStream(new ByteArrayInputStream(out.toByteArray()));
final CSVFormat format = (CSVFormat) in.readObject(); final CSVFormat format = (CSVFormat) in.readObject();

View File

@ -61,7 +61,7 @@ import org.junit.Test;
public class CSVParserTest { public class CSVParserTest {
private static final String CSV_INPUT = "a,b,c,d\n" + " a , b , 1 2 \n" + "\"foo baar\", b,\n" private static final String CSV_INPUT = "a,b,c,d\n" + " a , b , 1 2 \n" + "\"foo baar\", b,\n"
// + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n"; // + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
+ " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping + " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping
private static final String CSV_INPUT_1 = "a,b,c,d"; private static final String CSV_INPUT_1 = "a,b,c,d";
@ -79,7 +79,7 @@ public class CSVParserTest {
// quote as the encapsulator. // quote as the encapsulator.
final String code = "one,two,three\n" // 0 final String code = "one,two,three\n" // 0
+ "'',''\n" // 1) empty encapsulators + "'',''\n" // 1) empty encapsulators
+ "/',/'\n" // 2) single encapsulators + "/',/'\n" // 2) single encapsulators
+ "'/'','/''\n" // 3) single encapsulators encapsulated via escape + "'/'','/''\n" // 3) single encapsulators encapsulated via escape
+ "'''',''''\n" // 4) single encapsulators encapsulated via doubling + "'''',''''\n" // 4) single encapsulators encapsulated via doubling
@ -102,12 +102,12 @@ public class CSVParserTest {
final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(CRLF).withEscape('/') final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(CRLF).withEscape('/')
.withIgnoreEmptyLines(); .withIgnoreEmptyLines();
final CSVParser parser = CSVParser.parse(code, format); try (final CSVParser parser = CSVParser.parse(code, format)) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertTrue(records.size() > 0); assertTrue(records.size() > 0);
Utils.compare("Records do not match expected result", res, records); Utils.compare("Records do not match expected result", res, records);
parser.close(); }
} }
@Test @Test
@ -129,104 +129,98 @@ public class CSVParserTest {
final CSVFormat format = CSVFormat.newFormat(',').withRecordSeparator(CRLF).withEscape('/') final CSVFormat format = CSVFormat.newFormat(',').withRecordSeparator(CRLF).withEscape('/')
.withIgnoreEmptyLines(); .withIgnoreEmptyLines();
final CSVParser parser = CSVParser.parse(code, format); try (final CSVParser parser = CSVParser.parse(code, format)) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertTrue(records.size() > 0); assertTrue(records.size() > 0);
Utils.compare("", res, records); Utils.compare("", res, records);
parser.close(); }
} }
@Test @Test
@Ignore @Ignore
public void testBackslashEscapingOld() throws IOException { public void testBackslashEscapingOld() throws IOException {
final String code = "one,two,three\n" + "on\\\"e,two\n" + "on\"e,two\n" + "one,\"tw\\\"o\"\n" final String code = "one,two,three\n" + "on\\\"e,two\n" + "on\"e,two\n" + "one,\"tw\\\"o\"\n" +
+ "one,\"t\\,wo\"\n" + "one,two,\"th,ree\"\n" + "\"a\\\\\"\n" + "a\\,b\n" + "\"a\\\\,b\""; "one,\"t\\,wo\"\n" + "one,two,\"th,ree\"\n" + "\"a\\\\\"\n" + "a\\,b\n" + "\"a\\\\,b\"";
final String[][] res = { { "one", "two", "three" }, { "on\\\"e", "two" }, { "on\"e", "two" }, final String[][] res = { { "one", "two", "three" }, { "on\\\"e", "two" }, { "on\"e", "two" },
{ "one", "tw\"o" }, { "one", "t\\,wo" }, // backslash in quotes only escapes a delimiter (",") { "one", "tw\"o" }, { "one", "t\\,wo" }, // backslash in quotes only escapes a delimiter (",")
{ "one", "two", "th,ree" }, { "a\\\\" }, // backslash in quotes only escapes a delimiter (",") { "one", "two", "th,ree" }, { "a\\\\" }, // backslash in quotes only escapes a delimiter (",")
{ "a\\", "b" }, // a backslash must be returnd { "a\\", "b" }, // a backslash must be returnd
{ "a\\\\,b" } // backslash in quotes only escapes a delimiter (",") { "a\\\\,b" } // backslash in quotes only escapes a delimiter (",")
}; };
final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT); try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size()); assertEquals(res.length, records.size());
assertTrue(records.size() > 0); assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) { for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values()); assertArrayEquals(res[i], records.get(i).values());
}
} }
parser.close();
} }
@Test @Test
@Ignore("CSV-107") @Ignore("CSV-107")
public void testBOM() throws IOException { public void testBOM() throws IOException {
final URL url = ClassLoader.getSystemClassLoader().getResource("CSVFileParser/bom.csv"); final URL url = ClassLoader.getSystemClassLoader().getResource("CSVFileParser/bom.csv");
final CSVParser parser = CSVParser.parse(url, Charset.forName("UTF-8"), CSVFormat.EXCEL.withHeader()); try (final CSVParser parser = CSVParser.parse(url, Charset.forName("UTF-8"), CSVFormat.EXCEL.withHeader())) {
try {
for (final CSVRecord record : parser) { for (final CSVRecord record : parser) {
final String string = record.get("Date"); final String string = record.get("Date");
Assert.assertNotNull(string); Assert.assertNotNull(string);
// System.out.println("date: " + record.get("Date")); // System.out.println("date: " + record.get("Date"));
} }
} finally {
parser.close();
} }
} }
@Test @Test
public void testBOMInputStream() throws IOException { public void testBOMInputStream() throws IOException {
final URL url = ClassLoader.getSystemClassLoader().getResource("CSVFileParser/bom.csv"); final URL url = ClassLoader.getSystemClassLoader().getResource("CSVFileParser/bom.csv");
final Reader reader = new InputStreamReader(new BOMInputStream(url.openStream()), "UTF-8"); try (final Reader reader = new InputStreamReader(new BOMInputStream(url.openStream()), "UTF-8");
final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader()); final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader())) {
try {
for (final CSVRecord record : parser) { for (final CSVRecord record : parser) {
final String string = record.get("Date"); final String string = record.get("Date");
Assert.assertNotNull(string); Assert.assertNotNull(string);
// System.out.println("date: " + record.get("Date")); // System.out.println("date: " + record.get("Date"));
} }
} finally {
parser.close();
reader.close();
} }
} }
@Test @Test
public void testCarriageReturnEndings() throws IOException { public void testCarriageReturnEndings() throws IOException {
final String code = "foo\rbaar,\rhello,world\r,kanu"; final String code = "foo\rbaar,\rhello,world\r,kanu";
final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT); try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertEquals(4, records.size()); assertEquals(4, records.size());
parser.close(); }
} }
@Test @Test
public void testCarriageReturnLineFeedEndings() throws IOException { public void testCarriageReturnLineFeedEndings() throws IOException {
final String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; final String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT); try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertEquals(4, records.size()); assertEquals(4, records.size());
parser.close(); }
} }
@Test(expected = NoSuchElementException.class) @Test(expected = NoSuchElementException.class)
public void testClose() throws Exception { public void testClose() throws Exception {
final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z"); final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z");
final CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in); final Iterator<CSVRecord> records;
final Iterator<CSVRecord> records = parser.iterator(); try (final CSVParser parser = CSVFormat.DEFAULT.withCommentMarker('#').withHeader().parse(in)) {
assertTrue(records.hasNext()); records = parser.iterator();
parser.close(); assertTrue(records.hasNext());
}
assertFalse(records.hasNext()); assertFalse(records.hasNext());
records.next(); records.next();
} }
@Test @Test
public void testCSV57() throws Exception { public void testCSV57() throws Exception {
final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT); try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) {
final List<CSVRecord> list = parser.getRecords(); final List<CSVRecord> list = parser.getRecords();
assertNotNull(list); assertNotNull(list);
assertEquals(0, list.size()); assertEquals(0, list.size());
parser.close(); }
} }
@Test @Test
@ -235,27 +229,26 @@ public class CSVParserTest {
+ "\"\n\",\" \",#\n" // 2) + "\"\n\",\" \",#\n" // 2)
+ "#,\"\"\n" // 3) + "#,\"\"\n" // 3)
+ "# Final comment\n"// 4) + "# Final comment\n"// 4)
; ;
final String[][] res = { { "a", "b#" }, { "\n", " ", "#" }, { "#", "" }, { "# Final comment" } }; final String[][] res = { { "a", "b#" }, { "\n", " ", "#" }, { "#", "" }, { "# Final comment" } };
CSVFormat format = CSVFormat.DEFAULT; CSVFormat format = CSVFormat.DEFAULT;
assertFalse(format.isCommentMarkerSet()); assertFalse(format.isCommentMarkerSet());
CSVParser parser = CSVParser.parse(code, format);
List<CSVRecord> records = parser.getRecords();
assertTrue(records.size() > 0);
Utils.compare("Failed to parse without comments", res, records);
final String[][] res_comments = { { "a", "b#" }, { "\n", " ", "#" }, }; final String[][] res_comments = { { "a", "b#" }, { "\n", " ", "#" }, };
format = CSVFormat.DEFAULT.withCommentMarker('#'); try (final CSVParser parser = CSVParser.parse(code, format)) {
parser.close(); List<CSVRecord> records = parser.getRecords();
parser = CSVParser.parse(code, format); assertTrue(records.size() > 0);
records = parser.getRecords();
Utils.compare("Failed to parse with comments", res_comments, records); Utils.compare("Failed to parse without comments", res, records);
parser.close();
format = CSVFormat.DEFAULT.withCommentMarker('#');
}
try (final CSVParser parser = CSVParser.parse(code, format)) {
List<CSVRecord> records = parser.getRecords();
Utils.compare("Failed to parse with comments", res_comments, records);
}
} }
@Test(expected = IllegalArgumentException.class) @Test(expected = IllegalArgumentException.class)
@ -265,9 +258,9 @@ public class CSVParserTest {
@Test @Test
public void testEmptyFile() throws Exception { public void testEmptyFile() throws Exception {
final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT); try (final CSVParser parser = CSVParser.parse("", CSVFormat.DEFAULT)) {
assertNull(parser.nextRecord()); assertNull(parser.nextRecord());
parser.close(); }
} }
@Test @Test
@ -276,14 +269,14 @@ public class CSVParserTest {
final String[][] res = { { "hello", "" } // CSV format ignores empty lines final String[][] res = { { "hello", "" } // CSV format ignores empty lines
}; };
for (final String code : codes) { for (final String code : codes) {
final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT); try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size()); assertEquals(res.length, records.size());
assertTrue(records.size() > 0); assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) { for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values()); assertArrayEquals(res[i], records.get(i).values());
}
} }
parser.close();
} }
} }
@ -293,14 +286,14 @@ public class CSVParserTest {
final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines
{ "" } }; { "" } };
for (final String code : codes) { for (final String code : codes) {
final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL); try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size()); assertEquals(res.length, records.size());
assertTrue(records.size() > 0); assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) { for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values()); assertArrayEquals(res[i], records.get(i).values());
}
} }
parser.close();
} }
} }
@ -312,14 +305,14 @@ public class CSVParserTest {
final String[][] res = { { "hello", "" }, // CSV format ignores empty lines final String[][] res = { { "hello", "" }, // CSV format ignores empty lines
{ "world", "" } }; { "world", "" } };
for (final String code : codes) { for (final String code : codes) {
final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT); try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size()); assertEquals(res.length, records.size());
assertTrue(records.size() > 0); assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) { for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values()); assertArrayEquals(res[i], records.get(i).values());
}
} }
parser.close();
} }
} }
@ -332,45 +325,45 @@ public class CSVParserTest {
{ "world", "" } }; { "world", "" } };
for (final String code : codes) { for (final String code : codes) {
final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL); try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size());
assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values());
}
}
}
}
@Test
public void testExcelFormat1() throws IOException {
final String code = "value1,value2,value3,value4\r\na,b,c,d\r\n x,,," +
"\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n";
final String[][] res = { { "value1", "value2", "value3", "value4" }, { "a", "b", "c", "d" },
{ " x", "", "", "" }, { "" }, { "\"hello\"", " \"world\"", "abc\ndef", "" } };
try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size()); assertEquals(res.length, records.size());
assertTrue(records.size() > 0); assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) { for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values()); assertArrayEquals(res[i], records.get(i).values());
} }
parser.close();
} }
} }
@Test
public void testExcelFormat1() throws IOException {
final String code = "value1,value2,value3,value4\r\na,b,c,d\r\n x,,,"
+ "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n";
final String[][] res = { { "value1", "value2", "value3", "value4" }, { "a", "b", "c", "d" },
{ " x", "", "", "" }, { "" }, { "\"hello\"", " \"world\"", "abc\ndef", "" } };
final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL);
final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size());
assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values());
}
parser.close();
}
@Test @Test
public void testExcelFormat2() throws Exception { public void testExcelFormat2() throws Exception {
final String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n"; final String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n";
final String[][] res = { { "foo", "baar" }, { "" }, { "hello", "" }, { "" }, { "world", "" } }; final String[][] res = { { "foo", "baar" }, { "" }, { "hello", "" }, { "" }, { "world", "" } };
final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL); try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size()); assertEquals(res.length, records.size());
assertTrue(records.size() > 0); assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) { for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values()); assertArrayEquals(res[i], records.get(i).values());
}
} }
parser.close();
} }
/** /**
@ -379,67 +372,63 @@ public class CSVParserTest {
@Test @Test
public void testExcelHeaderCountLessThanData() throws Exception { public void testExcelHeaderCountLessThanData() throws Exception {
final String code = "A,B,C,,\r\na,b,c,d,e\r\n"; final String code = "A,B,C,,\r\na,b,c,d,e\r\n";
final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL.withHeader()); try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL.withHeader())) {
try {
for (final CSVRecord record : parser.getRecords()) { for (final CSVRecord record : parser.getRecords()) {
Assert.assertEquals("a", record.get("A")); Assert.assertEquals("a", record.get("A"));
Assert.assertEquals("b", record.get("B")); Assert.assertEquals("b", record.get("B"));
Assert.assertEquals("c", record.get("C")); Assert.assertEquals("c", record.get("C"));
} }
} finally {
parser.close();
} }
} }
@Test @Test
public void testForEach() throws Exception { public void testForEach() throws Exception {
final List<CSVRecord> records = new ArrayList<>(); final List<CSVRecord> records = new ArrayList<>();
try (final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z")) {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); for (final CSVRecord record : CSVFormat.DEFAULT.parse(in)) {
records.add(record);
for (final CSVRecord record : CSVFormat.DEFAULT.parse(in)) { }
records.add(record); assertEquals(3, records.size());
assertArrayEquals(new String[] { "a", "b", "c" }, records.get(0).values());
assertArrayEquals(new String[] { "1", "2", "3" }, records.get(1).values());
assertArrayEquals(new String[] { "x", "y", "z" }, records.get(2).values());
} }
assertEquals(3, records.size());
assertArrayEquals(new String[] { "a", "b", "c" }, records.get(0).values());
assertArrayEquals(new String[] { "1", "2", "3" }, records.get(1).values());
assertArrayEquals(new String[] { "x", "y", "z" }, records.get(2).values());
} }
@Test @Test
public void testGetHeaderMap() throws Exception { public void testGetHeaderMap() throws Exception {
final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader("A", "B", "C")); try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z",
final Map<String, Integer> headerMap = parser.getHeaderMap(); CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
final Iterator<String> columnNames = headerMap.keySet().iterator(); final Map<String, Integer> headerMap = parser.getHeaderMap();
// Headers are iterated in column order. final Iterator<String> columnNames = headerMap.keySet().iterator();
Assert.assertEquals("A", columnNames.next()); // Headers are iterated in column order.
Assert.assertEquals("B", columnNames.next()); Assert.assertEquals("A", columnNames.next());
Assert.assertEquals("C", columnNames.next()); Assert.assertEquals("B", columnNames.next());
final Iterator<CSVRecord> records = parser.iterator(); Assert.assertEquals("C", columnNames.next());
final Iterator<CSVRecord> records = parser.iterator();
// Parse to make sure getHeaderMap did not have a side-effect. // Parse to make sure getHeaderMap did not have a side-effect.
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
assertTrue(records.hasNext()); assertTrue(records.hasNext());
final CSVRecord record = records.next(); final CSVRecord record = records.next();
assertEquals(record.get(0), record.get("A")); assertEquals(record.get(0), record.get("A"));
assertEquals(record.get(1), record.get("B")); assertEquals(record.get(1), record.get("B"));
assertEquals(record.get(2), record.get("C")); assertEquals(record.get(2), record.get("C"));
}
assertFalse(records.hasNext());
} }
assertFalse(records.hasNext());
parser.close();
} }
@Test @Test
public void testGetLine() throws IOException { public void testGetLine() throws IOException {
final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces()); try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
for (final String[] re : RESULT) { for (final String[] re : RESULT) {
assertArrayEquals(re, parser.nextRecord().values()); assertArrayEquals(re, parser.nextRecord().values());
} }
assertNull(parser.nextRecord()); assertNull(parser.nextRecord());
parser.close(); }
} }
@Test @Test
@ -459,10 +448,10 @@ public class CSVParserTest {
@Test @Test
public void testGetOneLine() throws IOException { public void testGetOneLine() throws IOException {
final CSVParser parser = CSVParser.parse(CSV_INPUT_1, CSVFormat.DEFAULT); try (final CSVParser parser = CSVParser.parse(CSV_INPUT_1, CSVFormat.DEFAULT)) {
final CSVRecord record = parser.getRecords().get(0); final CSVRecord record = parser.getRecords().get(0);
assertArrayEquals(RESULT[0], record.values()); assertArrayEquals(RESULT[0], record.values());
parser.close(); }
} }
/** /**
@ -472,11 +461,9 @@ public class CSVParserTest {
*/ */
@Test @Test
public void testGetOneLineOneParser() throws IOException { public void testGetOneLineOneParser() throws IOException {
final PipedWriter writer = new PipedWriter();
final PipedReader reader = new PipedReader(writer);
final CSVFormat format = CSVFormat.DEFAULT; final CSVFormat format = CSVFormat.DEFAULT;
final CSVParser parser = new CSVParser(reader, format); try (final PipedWriter writer = new PipedWriter();
try { final CSVParser parser = new CSVParser(new PipedReader(writer), format)) {
writer.append(CSV_INPUT_1); writer.append(CSV_INPUT_1);
writer.append(format.getRecordSeparator()); writer.append(format.getRecordSeparator());
final CSVRecord record1 = parser.nextRecord(); final CSVRecord record1 = parser.nextRecord();
@ -485,8 +472,6 @@ public class CSVParserTest {
writer.append(format.getRecordSeparator()); writer.append(format.getRecordSeparator());
final CSVRecord record2 = parser.nextRecord(); final CSVRecord record2 = parser.nextRecord();
assertArrayEquals(RESULT[1], record2.values()); assertArrayEquals(RESULT[1], record2.values());
} finally {
parser.close();
} }
} }
@ -517,39 +502,40 @@ public class CSVParserTest {
@Test @Test
public void testGetRecords() throws IOException { public void testGetRecords() throws IOException {
final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces()); try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertEquals(RESULT.length, records.size()); assertEquals(RESULT.length, records.size());
assertTrue(records.size() > 0); assertTrue(records.size() > 0);
for (int i = 0; i < RESULT.length; i++) { for (int i = 0; i < RESULT.length; i++) {
assertArrayEquals(RESULT[i], records.get(i).values()); assertArrayEquals(RESULT[i], records.get(i).values());
}
} }
parser.close();
} }
@Test @Test
public void testGetRecordWithMultiLineValues() throws Exception { public void testGetRecordWithMultiLineValues() throws Exception {
final CSVParser parser = CSVParser.parse("\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + try (final CSVParser parser = CSVParser.parse(
"\"c\r\n1\",\"c\r\n2\"", CSVFormat.DEFAULT.withRecordSeparator(CRLF)); "\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"",
CSVRecord record; CSVFormat.DEFAULT.withRecordSeparator(CRLF))) {
assertEquals(0, parser.getRecordNumber()); CSVRecord record;
assertEquals(0, parser.getCurrentLineNumber()); assertEquals(0, parser.getRecordNumber());
assertNotNull(record = parser.nextRecord()); assertEquals(0, parser.getCurrentLineNumber());
assertEquals(3, parser.getCurrentLineNumber()); assertNotNull(record = parser.nextRecord());
assertEquals(1, record.getRecordNumber()); assertEquals(3, parser.getCurrentLineNumber());
assertEquals(1, parser.getRecordNumber()); assertEquals(1, record.getRecordNumber());
assertNotNull(record = parser.nextRecord()); assertEquals(1, parser.getRecordNumber());
assertEquals(6, parser.getCurrentLineNumber()); assertNotNull(record = parser.nextRecord());
assertEquals(2, record.getRecordNumber()); assertEquals(6, parser.getCurrentLineNumber());
assertEquals(2, parser.getRecordNumber()); assertEquals(2, record.getRecordNumber());
assertNotNull(record = parser.nextRecord()); assertEquals(2, parser.getRecordNumber());
assertEquals(8, parser.getCurrentLineNumber()); assertNotNull(record = parser.nextRecord());
assertEquals(3, record.getRecordNumber()); assertEquals(8, parser.getCurrentLineNumber());
assertEquals(3, parser.getRecordNumber()); assertEquals(3, record.getRecordNumber());
assertNull(record = parser.nextRecord()); assertEquals(3, parser.getRecordNumber());
assertEquals(8, parser.getCurrentLineNumber()); assertNull(record = parser.nextRecord());
assertEquals(3, parser.getRecordNumber()); assertEquals(8, parser.getCurrentLineNumber());
parser.close(); assertEquals(3, parser.getRecordNumber());
}
} }
@Test @Test
@ -636,16 +622,18 @@ public class CSVParserTest {
final String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n"; final String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
// String code = "world\r\n\n"; // String code = "world\r\n\n";
// String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n"; // String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT); try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertEquals(3, records.size()); assertEquals(3, records.size());
parser.close(); }
} }
@Test(expected = IllegalArgumentException.class) @Test(expected = IllegalArgumentException.class)
public void testInvalidFormat() throws Exception { public void testInvalidFormat() throws Exception {
final CSVFormat invalidFormat = CSVFormat.DEFAULT.withDelimiter(CR); final CSVFormat invalidFormat = CSVFormat.DEFAULT.withDelimiter(CR);
new CSVParser(null, invalidFormat).close(); try (final CSVParser parser = new CSVParser(null, invalidFormat)) {
Assert.fail("This test should have thrown an exception.");
}
} }
@Test @Test
@ -680,17 +668,17 @@ public class CSVParserTest {
@Test @Test
public void testLineFeedEndings() throws IOException { public void testLineFeedEndings() throws IOException {
final String code = "foo\nbaar,\nhello,world\n,kanu"; final String code = "foo\nbaar,\nhello,world\n,kanu";
final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT); try (final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT)) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertEquals(4, records.size()); assertEquals(4, records.size());
parser.close(); }
} }
@Test @Test
public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception { public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2\nx,y,z"); final Reader in = new StringReader("a,b,c\n1,2\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("A", "B", "C").withSkipHeaderRecord() final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("A", "B", "C").withSkipHeaderRecord().parse(in)
.parse(in).iterator(); .iterator();
CSVRecord record; CSVRecord record;
// 1st record // 1st record
@ -724,38 +712,41 @@ public class CSVParserTest {
@Test @Test
// TODO this may lead to strange behavior, throw an exception if iterator() has already been called? // TODO this may lead to strange behavior, throw an exception if iterator() has already been called?
public void testMultipleIterators() throws Exception { public void testMultipleIterators() throws Exception {
final CSVParser parser = CSVParser.parse("a,b,c" + CR + "d,e,f", CSVFormat.DEFAULT); try (final CSVParser parser = CSVParser.parse("a,b,c" + CR + "d,e,f", CSVFormat.DEFAULT)) {
final Iterator<CSVRecord> itr1 = parser.iterator();
final Iterator<CSVRecord> itr2 = parser.iterator();
final Iterator<CSVRecord> itr1 = parser.iterator(); final CSVRecord first = itr1.next();
final Iterator<CSVRecord> itr2 = parser.iterator(); assertEquals("a", first.get(0));
assertEquals("b", first.get(1));
assertEquals("c", first.get(2));
final CSVRecord first = itr1.next(); final CSVRecord second = itr2.next();
assertEquals("a", first.get(0)); assertEquals("d", second.get(0));
assertEquals("b", first.get(1)); assertEquals("e", second.get(1));
assertEquals("c", first.get(2)); assertEquals("f", second.get(2));
}
final CSVRecord second = itr2.next();
assertEquals("d", second.get(0));
assertEquals("e", second.get(1));
assertEquals("f", second.get(2));
parser.close();
} }
@Test(expected = IllegalArgumentException.class) @Test(expected = IllegalArgumentException.class)
public void testNewCSVParserNullReaderFormat() throws Exception { public void testNewCSVParserNullReaderFormat() throws Exception {
new CSVParser(null, CSVFormat.DEFAULT).close(); try (final CSVParser parser = new CSVParser(null, CSVFormat.DEFAULT)) {
Assert.fail("This test should have thrown an exception.");
}
} }
@Test(expected = IllegalArgumentException.class) @Test(expected = IllegalArgumentException.class)
public void testNewCSVParserReaderNullFormat() throws Exception { public void testNewCSVParserReaderNullFormat() throws Exception {
new CSVParser(new StringReader(""), null).close(); try (final CSVParser parser = new CSVParser(new StringReader(""), null)) {
Assert.fail("This test should have thrown an exception.");
}
} }
@Test @Test
public void testNoHeaderMap() throws Exception { public void testNoHeaderMap() throws Exception {
final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT); try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", CSVFormat.DEFAULT)) {
Assert.assertNull(parser.getHeaderMap()); Assert.assertNull(parser.getHeaderMap());
parser.close(); }
} }
@Test(expected = IllegalArgumentException.class) @Test(expected = IllegalArgumentException.class)
@ -780,8 +771,9 @@ public class CSVParserTest {
@Test(expected = IllegalArgumentException.class) @Test(expected = IllegalArgumentException.class)
public void testParserUrlNullCharsetFormat() throws Exception { public void testParserUrlNullCharsetFormat() throws Exception {
final CSVParser parser = CSVParser.parse(new URL("http://commons.apache.org"), null, CSVFormat.DEFAULT); try (final CSVParser parser = CSVParser.parse(new URL("http://commons.apache.org"), null, CSVFormat.DEFAULT)) {
parser.close(); Assert.fail("This test should have thrown an exception.");
}
} }
@Test(expected = IllegalArgumentException.class) @Test(expected = IllegalArgumentException.class)
@ -791,8 +783,9 @@ public class CSVParserTest {
@Test(expected = IllegalArgumentException.class) @Test(expected = IllegalArgumentException.class)
public void testParseUrlCharsetNullFormat() throws Exception { public void testParseUrlCharsetNullFormat() throws Exception {
final CSVParser parser = CSVParser.parse(new URL("http://commons.apache.org"), Charset.defaultCharset(), null); try (final CSVParser parser = CSVParser.parse(new URL("http://commons.apache.org"), Charset.defaultCharset(), null)) {
parser.close(); Assert.fail("This test should have thrown an exception.");
}
} }
@Test @Test
@ -840,13 +833,13 @@ public class CSVParserTest {
@Test @Test
public void testRoundtrip() throws Exception { public void testRoundtrip() throws Exception {
final StringWriter out = new StringWriter(); final StringWriter out = new StringWriter();
final CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT); try (final CSVPrinter printer = new CSVPrinter(out, CSVFormat.DEFAULT)) {
final String input = "a,b,c\r\n1,2,3\r\nx,y,z\r\n"; final String input = "a,b,c\r\n1,2,3\r\nx,y,z\r\n";
for (final CSVRecord record : CSVParser.parse(input, CSVFormat.DEFAULT)) { for (final CSVRecord record : CSVParser.parse(input, CSVFormat.DEFAULT)) {
printer.printRecord(record); printer.printRecord(record);
}
assertEquals(input, out.toString());
} }
assertEquals(input, out.toString());
printer.close();
} }
@Test @Test
@ -862,8 +855,8 @@ public class CSVParserTest {
@Test @Test
public void testSkipHeaderOverrideDuplicateHeaders() throws Exception { public void testSkipHeaderOverrideDuplicateHeaders() throws Exception {
final Reader in = new StringReader("a,a,a\n1,2,3\nx,y,z"); final Reader in = new StringReader("a,a,a\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord() final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)
.parse(in).iterator(); .iterator();
final CSVRecord record = records.next(); final CSVRecord record = records.next();
assertEquals("1", record.get("X")); assertEquals("1", record.get("X"));
assertEquals("2", record.get("Y")); assertEquals("2", record.get("Y"));
@ -873,8 +866,8 @@ public class CSVParserTest {
@Test @Test
public void testSkipSetAltHeaders() throws Exception { public void testSkipSetAltHeaders() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord() final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().parse(in)
.parse(in).iterator(); .iterator();
final CSVRecord record = records.next(); final CSVRecord record = records.next();
assertEquals("1", record.get("X")); assertEquals("1", record.get("X"));
assertEquals("2", record.get("Y")); assertEquals("2", record.get("Y"));
@ -884,8 +877,8 @@ public class CSVParserTest {
@Test @Test
public void testSkipSetHeader() throws Exception { public void testSkipSetHeader() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z"); final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("a", "b", "c").withSkipHeaderRecord() final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("a", "b", "c").withSkipHeaderRecord().parse(in)
.parse(in).iterator(); .iterator();
final CSVRecord record = records.next(); final CSVRecord record = records.next();
assertEquals("1", record.get("a")); assertEquals("1", record.get("a"));
assertEquals("2", record.get("b")); assertEquals("2", record.get("b"));
@ -895,27 +888,27 @@ public class CSVParserTest {
@Test @Test
@Ignore @Ignore
public void testStartWithEmptyLinesThenHeaders() throws Exception { public void testStartWithEmptyLinesThenHeaders() throws Exception {
final String[] codes = {"\r\n\r\n\r\nhello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", final String[] codes = { "\r\n\r\n\r\nhello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n",
"hello,\"\"\n\n\n"}; "hello,\"\"\n\n\n" };
final String[][] res = {{"hello", ""}, {""}, // Excel format does not ignore empty lines final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines
{""}}; { "" } };
for (final String code : codes) { for (final String code : codes) {
final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL); try (final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL)) {
final List<CSVRecord> records = parser.getRecords(); final List<CSVRecord> records = parser.getRecords();
assertEquals(res.length, records.size()); assertEquals(res.length, records.size());
assertTrue(records.size() > 0); assertTrue(records.size() > 0);
for (int i = 0; i < res.length; i++) { for (int i = 0; i < res.length; i++) {
assertArrayEquals(res[i], records.get(i).values()); assertArrayEquals(res[i], records.get(i).values());
}
} }
parser.close();
} }
} }
@Test @Test
public void testTrailingDelimiter() throws Exception { public void testTrailingDelimiter() throws Exception {
final Reader in = new StringReader("a,a,a,\n\"1\",\"2\",\"3\",\nx,y,z,"); final Reader in = new StringReader("a,a,a,\n\"1\",\"2\",\"3\",\nx,y,z,");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrailingDelimiter() final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord()
.parse(in).iterator(); .withTrailingDelimiter().parse(in).iterator();
final CSVRecord record = records.next(); final CSVRecord record = records.next();
assertEquals("1", record.get("X")); assertEquals("1", record.get("X"));
assertEquals("2", record.get("Y")); assertEquals("2", record.get("Y"));
@ -926,8 +919,8 @@ public class CSVParserTest {
@Test @Test
public void testTrim() throws Exception { public void testTrim() throws Exception {
final Reader in = new StringReader("a,a,a\n\" 1 \",\" 2 \",\" 3 \"\nx,y,z"); final Reader in = new StringReader("a,a,a\n\" 1 \",\" 2 \",\" 3 \"\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord().withTrim() final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("X", "Y", "Z").withSkipHeaderRecord()
.parse(in).iterator(); .withTrim().parse(in).iterator();
final CSVRecord record = records.next(); final CSVRecord record = records.next();
assertEquals("1", record.get("X")); assertEquals("1", record.get("X"));
assertEquals("2", record.get("Y")); assertEquals("2", record.get("Y"));
@ -936,46 +929,46 @@ public class CSVParserTest {
} }
private void validateLineNumbers(final String lineSeparator) throws IOException { private void validateLineNumbers(final String lineSeparator) throws IOException {
final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c",
CSVFormat.DEFAULT.withRecordSeparator(lineSeparator)); CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) {
assertEquals(0, parser.getCurrentLineNumber()); assertEquals(0, parser.getCurrentLineNumber());
assertNotNull(parser.nextRecord()); assertNotNull(parser.nextRecord());
assertEquals(1, parser.getCurrentLineNumber()); assertEquals(1, parser.getCurrentLineNumber());
assertNotNull(parser.nextRecord()); assertNotNull(parser.nextRecord());
assertEquals(2, parser.getCurrentLineNumber()); assertEquals(2, parser.getCurrentLineNumber());
assertNotNull(parser.nextRecord()); assertNotNull(parser.nextRecord());
// Still 2 because the last line is does not have EOL chars // Still 2 because the last line is does not have EOL chars
assertEquals(2, parser.getCurrentLineNumber()); assertEquals(2, parser.getCurrentLineNumber());
assertNull(parser.nextRecord()); assertNull(parser.nextRecord());
// Still 2 because the last line is does not have EOL chars // Still 2 because the last line is does not have EOL chars
assertEquals(2, parser.getCurrentLineNumber()); assertEquals(2, parser.getCurrentLineNumber());
parser.close(); }
} }
private void validateRecordNumbers(final String lineSeparator) throws IOException { private void validateRecordNumbers(final String lineSeparator) throws IOException {
final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c",
CSVFormat.DEFAULT.withRecordSeparator(lineSeparator)); CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) {
CSVRecord record; CSVRecord record;
assertEquals(0, parser.getRecordNumber()); assertEquals(0, parser.getRecordNumber());
assertNotNull(record = parser.nextRecord()); assertNotNull(record = parser.nextRecord());
assertEquals(1, record.getRecordNumber()); assertEquals(1, record.getRecordNumber());
assertEquals(1, parser.getRecordNumber()); assertEquals(1, parser.getRecordNumber());
assertNotNull(record = parser.nextRecord()); assertNotNull(record = parser.nextRecord());
assertEquals(2, record.getRecordNumber()); assertEquals(2, record.getRecordNumber());
assertEquals(2, parser.getRecordNumber()); assertEquals(2, parser.getRecordNumber());
assertNotNull(record = parser.nextRecord()); assertNotNull(record = parser.nextRecord());
assertEquals(3, record.getRecordNumber()); assertEquals(3, record.getRecordNumber());
assertEquals(3, parser.getRecordNumber()); assertEquals(3, parser.getRecordNumber());
assertNull(record = parser.nextRecord()); assertNull(record = parser.nextRecord());
assertEquals(3, parser.getRecordNumber()); assertEquals(3, parser.getRecordNumber());
parser.close(); }
} }
private void validateRecordPosition(final String lineSeparator) throws IOException { private void validateRecordPosition(final String lineSeparator) throws IOException {
final String nl = lineSeparator; // used as linebreak in values for better distinction final String nl = lineSeparator; // used as linebreak in values for better distinction
final String code = "a,b,c" + lineSeparator + "1,2,3" + lineSeparator + final String code = "a,b,c" + lineSeparator + "1,2,3" + lineSeparator +
// to see if recordPosition correctly points to the enclosing quote // to see if recordPosition correctly points to the enclosing quote
"'A" + nl + "A','B" + nl + "B',CC" + lineSeparator + "'A" + nl + "A','B" + nl + "B',CC" + lineSeparator +
// unicode test... not very relevant while operating on strings instead of bytes, but for // unicode test... not very relevant while operating on strings instead of bytes, but for
// completeness... // completeness...

File diff suppressed because it is too large Load Diff

View File

@ -143,16 +143,16 @@ public class CSVRecordTest {
@Test @Test
public void testRemoveAndAddColumns() throws IOException { public void testRemoveAndAddColumns() throws IOException {
// do: // do:
final CSVPrinter printer = new CSVPrinter(new StringBuilder(), CSVFormat.DEFAULT); try (final CSVPrinter printer = new CSVPrinter(new StringBuilder(), CSVFormat.DEFAULT)) {
final Map<String, String> map = recordWithHeader.toMap(); final Map<String, String> map = recordWithHeader.toMap();
map.remove("OldColumn"); map.remove("OldColumn");
map.put("ZColumn", "NewValue"); map.put("ZColumn", "NewValue");
// check: // check:
final ArrayList<String> list = new ArrayList<>(map.values()); final ArrayList<String> list = new ArrayList<>(map.values());
Collections.sort(list); Collections.sort(list);
printer.printRecord(list); printer.printRecord(list);
Assert.assertEquals("A,B,C,NewValue" + CSVFormat.DEFAULT.getRecordSeparator(), printer.getOut().toString()); Assert.assertEquals("A,B,C,NewValue" + CSVFormat.DEFAULT.getRecordSeparator(), printer.getOut().toString());
printer.close(); }
} }
@Test @Test
@ -163,18 +163,20 @@ public class CSVRecordTest {
@Test @Test
public void testToMapWithShortRecord() throws Exception { public void testToMapWithShortRecord() throws Exception {
final CSVParser parser = CSVParser.parse("a,b", CSVFormat.DEFAULT.withHeader("A", "B", "C")); try (final CSVParser parser = CSVParser.parse("a,b", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
final CSVRecord shortRec = parser.iterator().next(); final CSVRecord shortRec = parser.iterator().next();
shortRec.toMap(); shortRec.toMap();
}
} }
@Test @Test
public void testToMapWithNoHeader() throws Exception { public void testToMapWithNoHeader() throws Exception {
final CSVParser parser = CSVParser.parse("a,b", CSVFormat.newFormat(',')); try (final CSVParser parser = CSVParser.parse("a,b", CSVFormat.newFormat(','))) {
final CSVRecord shortRec = parser.iterator().next(); final CSVRecord shortRec = parser.iterator().next();
final Map<String, String> map = shortRec.toMap(); final Map<String, String> map = shortRec.toMap();
assertNotNull("Map is not null.", map); assertNotNull("Map is not null.", map);
assertTrue("Map is empty.", map.isEmpty()); assertTrue("Map is empty.", map.isEmpty());
}
} }
private void validateMap(final Map<String, String> map, final boolean allowsNulls) { private void validateMap(final Map<String, String> map, final boolean allowsNulls) {

View File

@ -36,72 +36,72 @@ public class ExtendedBufferedReaderTest {
@Test @Test
public void testEmptyInput() throws Exception { public void testEmptyInput() throws Exception {
final ExtendedBufferedReader br = getBufferedReader(""); try (final ExtendedBufferedReader br = createBufferedReader("")) {
assertEquals(END_OF_STREAM, br.read()); assertEquals(END_OF_STREAM, br.read());
assertEquals(END_OF_STREAM, br.lookAhead()); assertEquals(END_OF_STREAM, br.lookAhead());
assertEquals(END_OF_STREAM, br.getLastChar()); assertEquals(END_OF_STREAM, br.getLastChar());
assertNull(br.readLine()); assertNull(br.readLine());
assertEquals(0, br.read(new char[10], 0, 0)); assertEquals(0, br.read(new char[10], 0, 0));
br.close(); }
} }
@Test @Test
public void testReadLookahead1() throws Exception { public void testReadLookahead1() throws Exception {
final ExtendedBufferedReader br = getBufferedReader("1\n2\r3\n"); try (final ExtendedBufferedReader br = createBufferedReader("1\n2\r3\n")) {
assertEquals(0, br.getCurrentLineNumber()); assertEquals(0, br.getCurrentLineNumber());
assertEquals('1', br.lookAhead()); assertEquals('1', br.lookAhead());
assertEquals(UNDEFINED, br.getLastChar()); assertEquals(UNDEFINED, br.getLastChar());
assertEquals(0, br.getCurrentLineNumber()); assertEquals(0, br.getCurrentLineNumber());
assertEquals('1', br.read()); // Start line 1 assertEquals('1', br.read()); // Start line 1
assertEquals('1', br.getLastChar()); assertEquals('1', br.getLastChar());
assertEquals(1, br.getCurrentLineNumber()); assertEquals(1, br.getCurrentLineNumber());
assertEquals('\n', br.lookAhead()); assertEquals('\n', br.lookAhead());
assertEquals(1, br.getCurrentLineNumber()); assertEquals(1, br.getCurrentLineNumber());
assertEquals('1', br.getLastChar()); assertEquals('1', br.getLastChar());
assertEquals('\n', br.read()); assertEquals('\n', br.read());
assertEquals(1, br.getCurrentLineNumber()); assertEquals(1, br.getCurrentLineNumber());
assertEquals('\n', br.getLastChar()); assertEquals('\n', br.getLastChar());
assertEquals(1, br.getCurrentLineNumber()); assertEquals(1, br.getCurrentLineNumber());
assertEquals('2', br.lookAhead()); assertEquals('2', br.lookAhead());
assertEquals(1, br.getCurrentLineNumber()); assertEquals(1, br.getCurrentLineNumber());
assertEquals('\n', br.getLastChar()); assertEquals('\n', br.getLastChar());
assertEquals(1, br.getCurrentLineNumber()); assertEquals(1, br.getCurrentLineNumber());
assertEquals('2', br.read()); // Start line 2 assertEquals('2', br.read()); // Start line 2
assertEquals(2, br.getCurrentLineNumber()); assertEquals(2, br.getCurrentLineNumber());
assertEquals('2', br.getLastChar()); assertEquals('2', br.getLastChar());
assertEquals('\r', br.lookAhead()); assertEquals('\r', br.lookAhead());
assertEquals(2, br.getCurrentLineNumber()); assertEquals(2, br.getCurrentLineNumber());
assertEquals('2', br.getLastChar()); assertEquals('2', br.getLastChar());
assertEquals('\r', br.read()); assertEquals('\r', br.read());
assertEquals('\r', br.getLastChar()); assertEquals('\r', br.getLastChar());
assertEquals(2, br.getCurrentLineNumber()); assertEquals(2, br.getCurrentLineNumber());
assertEquals('3', br.lookAhead()); assertEquals('3', br.lookAhead());
assertEquals('\r', br.getLastChar()); assertEquals('\r', br.getLastChar());
assertEquals('3', br.read()); // Start line 3 assertEquals('3', br.read()); // Start line 3
assertEquals('3', br.getLastChar()); assertEquals('3', br.getLastChar());
assertEquals(3, br.getCurrentLineNumber()); assertEquals(3, br.getCurrentLineNumber());
assertEquals('\n', br.lookAhead()); assertEquals('\n', br.lookAhead());
assertEquals(3, br.getCurrentLineNumber()); assertEquals(3, br.getCurrentLineNumber());
assertEquals('3', br.getLastChar()); assertEquals('3', br.getLastChar());
assertEquals('\n', br.read()); assertEquals('\n', br.read());
assertEquals(3, br.getCurrentLineNumber()); assertEquals(3, br.getCurrentLineNumber());
assertEquals('\n', br.getLastChar()); assertEquals('\n', br.getLastChar());
assertEquals(3, br.getCurrentLineNumber()); assertEquals(3, br.getCurrentLineNumber());
assertEquals(END_OF_STREAM, br.lookAhead()); assertEquals(END_OF_STREAM, br.lookAhead());
assertEquals('\n', br.getLastChar()); assertEquals('\n', br.getLastChar());
assertEquals(END_OF_STREAM, br.read()); assertEquals(END_OF_STREAM, br.read());
assertEquals(END_OF_STREAM, br.getLastChar()); assertEquals(END_OF_STREAM, br.getLastChar());
assertEquals(END_OF_STREAM, br.read()); assertEquals(END_OF_STREAM, br.read());
assertEquals(END_OF_STREAM, br.lookAhead()); assertEquals(END_OF_STREAM, br.lookAhead());
assertEquals(3, br.getCurrentLineNumber()); assertEquals(3, br.getCurrentLineNumber());
br.close(); }
} }
@Test @Test
@ -109,109 +109,104 @@ public class ExtendedBufferedReaderTest {
final char[] ref = new char[5]; final char[] ref = new char[5];
final char[] res = new char[5]; final char[] res = new char[5];
final ExtendedBufferedReader br = getBufferedReader("abcdefg"); try (final ExtendedBufferedReader br = createBufferedReader("abcdefg")) {
ref[0] = 'a'; ref[0] = 'a';
ref[1] = 'b'; ref[1] = 'b';
ref[2] = 'c'; ref[2] = 'c';
assertEquals(3, br.read(res, 0, 3)); assertEquals(3, br.read(res, 0, 3));
assertArrayEquals(ref, res); assertArrayEquals(ref, res);
assertEquals('c', br.getLastChar()); assertEquals('c', br.getLastChar());
assertEquals('d', br.lookAhead()); assertEquals('d', br.lookAhead());
ref[4] = 'd'; ref[4] = 'd';
assertEquals(1, br.read(res, 4, 1)); assertEquals(1, br.read(res, 4, 1));
assertArrayEquals(ref, res); assertArrayEquals(ref, res);
assertEquals('d', br.getLastChar()); assertEquals('d', br.getLastChar());
br.close(); }
} }
@Test @Test
public void testReadLine() throws Exception { public void testReadLine() throws Exception {
ExtendedBufferedReader br = getBufferedReader(""); try (final ExtendedBufferedReader br = createBufferedReader("")) {
assertNull(br.readLine()); assertNull(br.readLine());
}
br.close(); try (final ExtendedBufferedReader br = createBufferedReader("\n")) {
br = getBufferedReader("\n"); assertEquals("", br.readLine());
assertEquals("",br.readLine()); assertNull(br.readLine());
assertNull(br.readLine()); }
try (final ExtendedBufferedReader br = createBufferedReader("foo\n\nhello")) {
br.close(); assertEquals(0, br.getCurrentLineNumber());
br = getBufferedReader("foo\n\nhello"); assertEquals("foo", br.readLine());
assertEquals(0, br.getCurrentLineNumber()); assertEquals(1, br.getCurrentLineNumber());
assertEquals("foo",br.readLine()); assertEquals("", br.readLine());
assertEquals(1, br.getCurrentLineNumber()); assertEquals(2, br.getCurrentLineNumber());
assertEquals("",br.readLine()); assertEquals("hello", br.readLine());
assertEquals(2, br.getCurrentLineNumber()); assertEquals(3, br.getCurrentLineNumber());
assertEquals("hello",br.readLine()); assertNull(br.readLine());
assertEquals(3, br.getCurrentLineNumber()); assertEquals(3, br.getCurrentLineNumber());
assertNull(br.readLine()); }
assertEquals(3, br.getCurrentLineNumber()); try (final ExtendedBufferedReader br = createBufferedReader("foo\n\nhello")) {
assertEquals('f', br.read());
br.close(); assertEquals('o', br.lookAhead());
br = getBufferedReader("foo\n\nhello"); assertEquals("oo", br.readLine());
assertEquals('f', br.read()); assertEquals(1, br.getCurrentLineNumber());
assertEquals('o', br.lookAhead()); assertEquals('\n', br.lookAhead());
assertEquals("oo",br.readLine()); assertEquals("", br.readLine());
assertEquals(1, br.getCurrentLineNumber()); assertEquals(2, br.getCurrentLineNumber());
assertEquals('\n', br.lookAhead()); assertEquals('h', br.lookAhead());
assertEquals("",br.readLine()); assertEquals("hello", br.readLine());
assertEquals(2, br.getCurrentLineNumber()); assertNull(br.readLine());
assertEquals('h', br.lookAhead()); assertEquals(3, br.getCurrentLineNumber());
assertEquals("hello",br.readLine()); }
assertNull(br.readLine()); try (final ExtendedBufferedReader br = createBufferedReader("foo\rbaar\r\nfoo")) {
assertEquals(3, br.getCurrentLineNumber()); assertEquals("foo", br.readLine());
assertEquals('b', br.lookAhead());
assertEquals("baar", br.readLine());
br.close(); assertEquals('f', br.lookAhead());
br = getBufferedReader("foo\rbaar\r\nfoo"); assertEquals("foo", br.readLine());
assertEquals("foo",br.readLine()); assertNull(br.readLine());
assertEquals('b', br.lookAhead()); }
assertEquals("baar",br.readLine());
assertEquals('f', br.lookAhead());
assertEquals("foo",br.readLine());
assertNull(br.readLine());
br.close();
} }
/* /*
* Test to illustrate https://issues.apache.org/jira/browse/CSV-75 * Test to illustrate https://issues.apache.org/jira/browse/CSV-75
* *
*/ */
@Test @Test
public void testReadChar() throws Exception { public void testReadChar() throws Exception {
final String LF="\n"; final String CR="\r"; final String CRLF=CR+LF; final String LFCR=LF+CR;// easier to read the string below final String LF = "\n";
final String test="a" + LF + "b" + CR + "c" + LF + LF + "d" + CR + CR + "e" + LFCR + "f "+ CRLF; final String CR = "\r";
// EOL eol EOL EOL eol eol EOL+CR EOL final String CRLF = CR + LF;
final String LFCR = LF + CR;// easier to read the string below
final String test = "a" + LF + "b" + CR + "c" + LF + LF + "d" + CR + CR + "e" + LFCR + "f " + CRLF;
// EOL eol EOL EOL eol eol EOL+CR EOL
final int EOLeolct = 9; final int EOLeolct = 9;
ExtendedBufferedReader br;
br = getBufferedReader(test); try (final ExtendedBufferedReader br = createBufferedReader(test)) {
assertEquals(0, br.getCurrentLineNumber()); assertEquals(0, br.getCurrentLineNumber());
while (br.readLine() != null) { while (br.readLine() != null) {
// consume all // consume all
}
assertEquals(EOLeolct, br.getCurrentLineNumber());
} }
assertEquals(EOLeolct, br.getCurrentLineNumber()); try (final ExtendedBufferedReader br = createBufferedReader(test)) {
assertEquals(0, br.getCurrentLineNumber());
br.close(); while (br.read() != -1) {
br = getBufferedReader(test); // consume all
assertEquals(0, br.getCurrentLineNumber()); }
while (br.read() != -1) { assertEquals(EOLeolct, br.getCurrentLineNumber());
// consume all
} }
assertEquals(EOLeolct, br.getCurrentLineNumber()); try (final ExtendedBufferedReader br = createBufferedReader(test)) {
assertEquals(0, br.getCurrentLineNumber());
br.close(); final char[] buff = new char[10];
br = getBufferedReader(test); while (br.read(buff, 0, 3) != -1) {
assertEquals(0, br.getCurrentLineNumber()); // consume all
final char[] buff = new char[10]; }
while (br.read(buff, 0, 3) != -1) { assertEquals(EOLeolct, br.getCurrentLineNumber());
// consume all
} }
assertEquals(EOLeolct, br.getCurrentLineNumber());
br.close();
} }
private ExtendedBufferedReader getBufferedReader(final String s) { private ExtendedBufferedReader createBufferedReader(final String s) {
return new ExtendedBufferedReader(new StringReader(s)); return new ExtendedBufferedReader(new StringReader(s));
} }
} }

View File

@ -52,345 +52,341 @@ public class LexerTest {
formatWithEscaping = CSVFormat.DEFAULT.withEscape('\\'); formatWithEscaping = CSVFormat.DEFAULT.withEscape('\\');
} }
private Lexer getLexer(final String input, final CSVFormat format) { private Lexer createLexer(final String input, final CSVFormat format) {
return new Lexer(format, new ExtendedBufferedReader(new StringReader(input))); return new Lexer(format, new ExtendedBufferedReader(new StringReader(input)));
} }
@Test @Test
public void testSurroundingSpacesAreDeleted() throws IOException { public void testSurroundingSpacesAreDeleted() throws IOException {
final String code = "noSpaces, leadingSpaces,trailingSpaces , surroundingSpaces , ,,"; final String code = "noSpaces, leadingSpaces,trailingSpaces , surroundingSpaces , ,,";
final Lexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces()); try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "noSpaces")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "noSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingSpaces")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingSpaces")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingSpaces")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, "")); assertThat(parser.nextToken(new Token()), matches(EOF, ""));
}
} }
@Test @Test
public void testSurroundingTabsAreDeleted() throws IOException { public void testSurroundingTabsAreDeleted() throws IOException {
final String code = "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,,"; final String code = "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,,";
final Lexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces()); try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "noTabs")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "noTabs"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingTab")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingTab"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingTab")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingTab"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingTabs")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingTabs"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, "")); assertThat(parser.nextToken(new Token()), matches(EOF, ""));
}
} }
@Test @Test
public void testIgnoreEmptyLines() throws IOException { public void testIgnoreEmptyLines() throws IOException {
final String code = final String code = "first,line,\n" + "\n" + "\n" + "second,line\n" + "\n" + "\n" + "third line \n" + "\n" +
"first,line,\n"+ "\n" + "last, line \n" + "\n" + "\n" + "\n";
"\n"+
"\n"+
"second,line\n"+
"\n"+
"\n"+
"third line \n"+
"\n"+
"\n"+
"last, line \n"+
"\n"+
"\n"+
"\n";
final CSVFormat format = CSVFormat.DEFAULT.withIgnoreEmptyLines(); final CSVFormat format = CSVFormat.DEFAULT.withIgnoreEmptyLines();
final Lexer parser = getLexer(code, format); try (final Lexer parser = createLexer(code, format)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "first"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "first")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "line"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "line")); assertThat(parser.nextToken(new Token()), matches(EORECORD, ""));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "second"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "second")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "line"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "line")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "third line "));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "third line ")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "last"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "last")); assertThat(parser.nextToken(new Token()), matches(EORECORD, " line "));
assertThat(parser.nextToken(new Token()), matches(EORECORD, " line ")); assertThat(parser.nextToken(new Token()), matches(EOF, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, "")); assertThat(parser.nextToken(new Token()), matches(EOF, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, "")); }
} }
@Test @Test
public void testComments() throws IOException { public void testComments() throws IOException {
final String code = final String code = "first,line,\n" + "second,line,tokenWith#no-comment\n" + "# comment line \n" +
"first,line,\n"+ "third,line,#no-comment\n" + "# penultimate comment\n" + "# Final comment\n";
"second,line,tokenWith#no-comment\n"+
"# comment line \n"+
"third,line,#no-comment\n"+
"# penultimate comment\n"+
"# Final comment\n";
final CSVFormat format = CSVFormat.DEFAULT.withCommentMarker('#'); final CSVFormat format = CSVFormat.DEFAULT.withCommentMarker('#');
final Lexer parser = getLexer(code, format); try (final Lexer parser = createLexer(code, format)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "first"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "first")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "line"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "line")); assertThat(parser.nextToken(new Token()), matches(EORECORD, ""));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "second"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "second")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "line"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "line")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "tokenWith#no-comment"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "tokenWith#no-comment")); assertThat(parser.nextToken(new Token()), matches(COMMENT, "comment line"));
assertThat(parser.nextToken(new Token()), matches(COMMENT, "comment line")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "third"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "third")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "line"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "line")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "#no-comment"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "#no-comment")); assertThat(parser.nextToken(new Token()), matches(COMMENT, "penultimate comment"));
assertThat(parser.nextToken(new Token()), matches(COMMENT, "penultimate comment")); assertThat(parser.nextToken(new Token()), matches(COMMENT, "Final comment"));
assertThat(parser.nextToken(new Token()), matches(COMMENT, "Final comment")); assertThat(parser.nextToken(new Token()), matches(EOF, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, "")); assertThat(parser.nextToken(new Token()), matches(EOF, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, "")); }
} }
@Test @Test
public void testCommentsAndEmptyLines() throws IOException { public void testCommentsAndEmptyLines() throws IOException {
final String code = final String code = "1,2,3,\n" + // 1
"1,2,3,\n"+ // 1 "\n" + // 1b
"\n"+ // 1b "\n" + // 1c
"\n"+ // 1c "a,b x,c#no-comment\n" + // 2
"a,b x,c#no-comment\n"+ // 2 "#foo\n" + // 3
"#foo\n"+ // 3 "\n" + // 4
"\n"+ // 4 "\n" + // 4b
"\n"+ // 4b "d,e,#no-comment\n" + // 5
"d,e,#no-comment\n"+ // 5 "\n" + // 5b
"\n"+ // 5b "\n" + // 5c
"\n"+ // 5c "# penultimate comment\n" + // 6
"# penultimate comment\n"+ // 6 "\n" + // 6b
"\n"+ // 6b "\n" + // 6c
"\n"+ // 6c "# Final comment\n"; // 7
"# Final comment\n"; // 7
final CSVFormat format = CSVFormat.DEFAULT.withCommentMarker('#').withIgnoreEmptyLines(false); final CSVFormat format = CSVFormat.DEFAULT.withCommentMarker('#').withIgnoreEmptyLines(false);
assertFalse("Should not ignore empty lines", format.getIgnoreEmptyLines()); assertFalse("Should not ignore empty lines", format.getIgnoreEmptyLines());
final Lexer parser = getLexer(code, format); try (final Lexer parser = createLexer(code, format)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "1"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "2"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "1")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "3"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "2")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 1
assertThat(parser.nextToken(new Token()), matches(TOKEN, "3")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 1b
assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 1 assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 1c
assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 1b assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 1c assertThat(parser.nextToken(new Token()), matches(TOKEN, "b x"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "c#no-comment")); // 2
assertThat(parser.nextToken(new Token()), matches(TOKEN, "b x")); assertThat(parser.nextToken(new Token()), matches(COMMENT, "foo")); // 3
assertThat(parser.nextToken(new Token()), matches(EORECORD, "c#no-comment")); // 2 assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 4
assertThat(parser.nextToken(new Token()), matches(COMMENT, "foo")); // 3 assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 4b
assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 4 assertThat(parser.nextToken(new Token()), matches(TOKEN, "d"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 4b assertThat(parser.nextToken(new Token()), matches(TOKEN, "e"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "d")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "#no-comment")); // 5
assertThat(parser.nextToken(new Token()), matches(TOKEN, "e")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 5b
assertThat(parser.nextToken(new Token()), matches(EORECORD, "#no-comment")); // 5 assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 5c
assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 5b assertThat(parser.nextToken(new Token()), matches(COMMENT, "penultimate comment")); // 6
assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 5c assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 6b
assertThat(parser.nextToken(new Token()), matches(COMMENT, "penultimate comment")); // 6 assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 6c
assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 6b assertThat(parser.nextToken(new Token()), matches(COMMENT, "Final comment")); // 7
assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); // 6c assertThat(parser.nextToken(new Token()), matches(EOF, ""));
assertThat(parser.nextToken(new Token()), matches(COMMENT, "Final comment")); // 7 assertThat(parser.nextToken(new Token()), matches(EOF, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, "")); }
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
} }
// simple token with escaping not enabled // simple token with escaping not enabled
@Test @Test
public void testBackslashWithoutEscaping() throws IOException { public void testBackslashWithoutEscaping() throws IOException {
/* file: a,\,,b /*
* \,, * file: a,\,,b \,,
*/ */
final String code = "a,\\,,b\\\n\\,,"; final String code = "a,\\,,b\\\n\\,,";
final CSVFormat format = CSVFormat.DEFAULT; final CSVFormat format = CSVFormat.DEFAULT;
assertFalse(format.isEscapeCharacterSet()); assertFalse(format.isEscapeCharacterSet());
final Lexer parser = getLexer(code, format); try (final Lexer parser = createLexer(code, format)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); // an unquoted single backslash is not an escape char
// an unquoted single backslash is not an escape char assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\")); assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\")); // an unquoted single backslash is not an escape char
// an unquoted single backslash is not an escape char assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\")); assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); assertThat(parser.nextToken(new Token()), matches(EOF, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, "")); }
} }
// simple token with escaping enabled // simple token with escaping enabled
@Test @Test
public void testBackslashWithEscaping() throws IOException { public void testBackslashWithEscaping() throws IOException {
/* file: a,\,,b /*
* \,, * file: a,\,,b \,,
*/ */
final String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\r\ne"; final String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\r\ne";
final CSVFormat format = formatWithEscaping.withIgnoreEmptyLines(false); final CSVFormat format = formatWithEscaping.withIgnoreEmptyLines(false);
assertTrue(format.isEscapeCharacterSet()); assertTrue(format.isEscapeCharacterSet());
final Lexer parser = getLexer(code, format); try (final Lexer parser = createLexer(code, format)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); assertThat(parser.nextToken(new Token()), matches(TOKEN, ","));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ",")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\")); assertThat(parser.nextToken(new Token()), matches(TOKEN, ","));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ",")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "\nc"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\nc")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "d\r"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "d\r")); assertThat(parser.nextToken(new Token()), matches(EOF, "e"));
assertThat(parser.nextToken(new Token()), matches(EOF, "e")); }
} }
// encapsulator tokenizer (single line) // encapsulator tokenizer (single line)
@Test @Test
public void testNextToken4() throws IOException { public void testNextToken4() throws IOException {
/* file: a,"foo",b /*
* a, " foo",b * file: a,"foo",b a, " foo",b a,"foo " ,b // whitespace after closing encapsulator a, " foo " ,b
* a,"foo " ,b // whitespace after closing encapsulator */
* a, " foo " ,b
*/
final String code = "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b"; final String code = "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
final Lexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces()); try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "b"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, " foo")); assertThat(parser.nextToken(new Token()), matches(TOKEN, " foo"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "b"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo ")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo "));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "b"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, " foo ")); assertThat(parser.nextToken(new Token()), matches(TOKEN, " foo "));
// assertTokenEquals(EORECORD, "b", parser.nextToken(new Token())); // assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));
assertThat(parser.nextToken(new Token()), matches(EOF, "b")); assertThat(parser.nextToken(new Token()), matches(EOF, "b"));
}
} }
// encapsulator tokenizer (multi line, delimiter in string) // encapsulator tokenizer (multi line, delimiter in string)
@Test @Test
public void testNextToken5() throws IOException { public void testNextToken5() throws IOException {
final String code = "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\""; final String code = "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\"";
final Lexer parser = getLexer(code, CSVFormat.DEFAULT); try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo\n")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo\n"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "b"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "foo\n baar ,,,")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "foo\n baar ,,,"));
assertThat(parser.nextToken(new Token()), matches(EOF, "\n\t \n")); assertThat(parser.nextToken(new Token()), matches(EOF, "\n\t \n"));
}
} }
// change delimiters, comment, encapsulater // change delimiters, comment, encapsulater
@Test @Test
public void testNextToken6() throws IOException { public void testNextToken6() throws IOException {
/* file: a;'b and \' more /*
* ' * file: a;'b and \' more ' !comment;;;; ;;
* !comment;;;; */
* ;;
*/
final String code = "a;'b and '' more\n'\n!comment;;;;\n;;"; final String code = "a;'b and '' more\n'\n!comment;;;;\n;;";
final CSVFormat format = CSVFormat.DEFAULT.withQuote('\'').withCommentMarker('!').withDelimiter(';'); final CSVFormat format = CSVFormat.DEFAULT.withQuote('\'').withCommentMarker('!').withDelimiter(';');
final Lexer parser = getLexer(code, format); try (final Lexer parser = createLexer(code, format)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b and ' more\n")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "b and ' more\n"));
}
} }
// From CSV-1 // From CSV-1
@Test @Test
public void testDelimiterIsWhitespace() throws IOException { public void testDelimiterIsWhitespace() throws IOException {
final String code = "one\ttwo\t\tfour \t five\t six"; final String code = "one\ttwo\t\tfour \t five\t six";
final Lexer parser = getLexer(code, CSVFormat.TDF); try (final Lexer parser = createLexer(code, CSVFormat.TDF)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "one")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "one"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "two")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "two"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "four")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "four"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "five")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "five"));
assertThat(parser.nextToken(new Token()), matches(EOF, "six")); assertThat(parser.nextToken(new Token()), matches(EOF, "six"));
}
} }
@Test @Test
public void testEscapedCR() throws Exception { public void testEscapedCR() throws Exception {
final Lexer lexer = getLexer("character\\" + CR + "Escaped", formatWithEscaping); try (final Lexer lexer = createLexer("character\\" + CR + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
}
} }
@Test @Test
public void testCR() throws Exception { public void testCR() throws Exception {
final Lexer lexer = getLexer("character" + CR + "NotEscaped", formatWithEscaping); try (final Lexer lexer = createLexer("character" + CR + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character")); assertThat(lexer.nextToken(new Token()), hasContent("character"));
assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
}
} }
@Test @Test
public void testEscapedLF() throws Exception { public void testEscapedLF() throws Exception {
final Lexer lexer = getLexer("character\\" + LF + "Escaped", formatWithEscaping); try (final Lexer lexer = createLexer("character\\" + LF + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + LF + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + LF + "Escaped"));
}
} }
@Test @Test
public void testLF() throws Exception { public void testLF() throws Exception {
final Lexer lexer = getLexer("character" + LF + "NotEscaped", formatWithEscaping); try (final Lexer lexer = createLexer("character" + LF + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character")); assertThat(lexer.nextToken(new Token()), hasContent("character"));
assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
}
} }
@Test // TODO is this correct? Do we expect <esc>TAB to be unescaped? @Test // TODO is this correct? Do we expect <esc>TAB to be unescaped?
public void testEscapedTab() throws Exception { public void testEscapedTab() throws Exception {
final Lexer lexer = getLexer("character\\" + TAB + "Escaped", formatWithEscaping); try (final Lexer lexer = createLexer("character\\" + TAB + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "Escaped"));
}
} }
@Test @Test
public void testTab() throws Exception { public void testTab() throws Exception {
final Lexer lexer = getLexer("character" + TAB + "NotEscaped", formatWithEscaping); try (final Lexer lexer = createLexer("character" + TAB + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "NotEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "NotEscaped"));
}
} }
@Test // TODO is this correct? Do we expect <esc>BACKSPACE to be unescaped? @Test // TODO is this correct? Do we expect <esc>BACKSPACE to be unescaped?
public void testEscapedBackspace() throws Exception { public void testEscapedBackspace() throws Exception {
final Lexer lexer = getLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping); try (final Lexer lexer = createLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "Escaped"));
}
} }
@Test @Test
public void testBackspace() throws Exception { public void testBackspace() throws Exception {
final Lexer lexer = getLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping); try (final Lexer lexer = createLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "NotEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "NotEscaped"));
}
} }
@Test // TODO is this correct? Do we expect <esc>FF to be unescaped? @Test // TODO is this correct? Do we expect <esc>FF to be unescaped?
public void testEscapedFF() throws Exception { public void testEscapedFF() throws Exception {
final Lexer lexer = getLexer("character\\" + FF + "Escaped", formatWithEscaping); try (final Lexer lexer = createLexer("character\\" + FF + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "Escaped"));
}
} }
@Test @Test
public void testFF() throws Exception { public void testFF() throws Exception {
final Lexer lexer = getLexer("character" + FF + "NotEscaped", formatWithEscaping); try (final Lexer lexer = createLexer("character" + FF + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "NotEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "NotEscaped"));
}
} }
@Test @Test
public void testEscapedMySqlNullValue() throws Exception { public void testEscapedMySqlNullValue() throws Exception {
// MySQL uses \N to symbolize null values. We have to restore this // MySQL uses \N to symbolize null values. We have to restore this
final Lexer lexer = getLexer("character\\NEscaped", formatWithEscaping); try (final Lexer lexer = createLexer("character\\NEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character\\NEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("character\\NEscaped"));
}
} }
@Test @Test
public void testEscapedCharacter() throws Exception { public void testEscapedCharacter() throws Exception {
final Lexer lexer = getLexer("character\\aEscaped", formatWithEscaping); try (final Lexer lexer = createLexer("character\\aEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character\\aEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("character\\aEscaped"));
}
} }
@Test @Test
public void testEscapedControlCharacter() throws Exception { public void testEscapedControlCharacter() throws Exception {
// we are explicitly using an escape different from \ here // we are explicitly using an escape different from \ here
final Lexer lexer = getLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!')); try (final Lexer lexer = createLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!'))) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
}
} }
@Test @Test
public void testEscapedControlCharacter2() throws Exception { public void testEscapedControlCharacter2() throws Exception {
final Lexer lexer = getLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\')); try (final Lexer lexer = createLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\'))) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
}
} }
@Test(expected = IOException.class) @Test(expected = IOException.class)
public void testEscapingAtEOF() throws Exception { public void testEscapingAtEOF() throws Exception {
final String code = "escaping at EOF is evil\\"; final String code = "escaping at EOF is evil\\";
final Lexer lexer = getLexer(code, formatWithEscaping); try (final Lexer lexer = createLexer(code, formatWithEscaping)) {
lexer.nextToken(new Token());
lexer.nextToken(new Token()); }
} }
} }

View File

@ -26,6 +26,7 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
@ -73,11 +74,11 @@ public class PerformanceTest {
System.out.println(String.format("Found test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length())); System.out.println(String.format("Found test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length()));
} else { } else {
System.out.println("Decompressing test fixture " + BIG_FILE + "..."); System.out.println("Decompressing test fixture " + BIG_FILE + "...");
final InputStream input = new GZIPInputStream(new FileInputStream("src/test/resources/perf/worldcitiespop.txt.gz")); try (final InputStream input = new GZIPInputStream(
final OutputStream output = new FileOutputStream(BIG_FILE); new FileInputStream("src/test/resources/perf/worldcitiespop.txt.gz"));
IOUtils.copy(input, output); final OutputStream output = new FileOutputStream(BIG_FILE)) {
input.close(); IOUtils.copy(input, output);
output.close(); }
System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length())); System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length()));
} }
final int argc = args.length; final int argc = args.length;
@ -121,7 +122,7 @@ public class PerformanceTest {
} }
} }
private static BufferedReader getReader() throws IOException { private static BufferedReader createReader() throws IOException {
return new BufferedReader(new FileReader(BIG_FILE)); return new BufferedReader(new FileReader(BIG_FILE));
} }
@ -155,15 +156,17 @@ public class PerformanceTest {
} }
private static void testReadBigFile(final boolean split) throws Exception { private static void testReadBigFile(final boolean split) throws Exception {
for (int i = 0; i < max; i++) { for (int i = 0; i < max; i++) {
final BufferedReader in = getReader(); final long startMillis;
final long t0 = System.currentTimeMillis(); final Stats stats;
final Stats s = readAll(in, split); try (final BufferedReader in = createReader()) {
in.close(); startMillis = System.currentTimeMillis();
show(split?"file+split":"file", s, t0); stats = readAll(in, split);
} }
show(); show(split ? "file+split" : "file", stats, startMillis);
} }
show();
}
private static Stats readAll(final BufferedReader in, final boolean split) throws IOException { private static Stats readAll(final BufferedReader in, final boolean split) throws IOException {
int count = 0; int count = 0;
@ -176,55 +179,58 @@ public class PerformanceTest {
return new Stats(count, fields); return new Stats(count, fields);
} }
private static void testExtendedBuffer(final boolean makeString) throws Exception { private static void testExtendedBuffer(final boolean makeString) throws Exception {
for (int i = 0; i < max; i++) { for (int i = 0; i < max; i++) {
final ExtendedBufferedReader in = new ExtendedBufferedReader(getReader()); int fields = 0;
final long t0 = System.currentTimeMillis(); int lines = 0;
int read; final long startMillis;
int fields = 0; try (final ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) {
int lines = 0; startMillis = System.currentTimeMillis();
if (makeString) { int read;
StringBuilder sb = new StringBuilder(); if (makeString) {
while((read=in.read()) != -1) { StringBuilder sb = new StringBuilder();
sb.append((char)read); while ((read = in.read()) != -1) {
if (read == ',') { // count delimiters sb.append((char) read);
sb.toString(); if (read == ',') { // count delimiters
sb = new StringBuilder(); sb.toString();
fields++; sb = new StringBuilder();
} else if (read == '\n') { fields++;
sb.toString(); } else if (read == '\n') {
sb = new StringBuilder(); sb.toString();
lines++; sb = new StringBuilder();
} lines++;
} }
} else { }
while((read=in.read()) != -1) { } else {
if (read == ',') { // count delimiters while ((read = in.read()) != -1) {
fields++; if (read == ',') { // count delimiters
} else if (read == '\n') { fields++;
lines++; } else if (read == '\n') {
} lines++;
} }
} }
fields += lines; // EOL is a delimiter too }
in.close(); fields += lines; // EOL is a delimiter too
show("Extended"+(makeString?" toString":""), new Stats(lines, fields), t0); }
} show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis);
show(); }
} show();
}
private static void testParseCommonsCSV() throws Exception { private static void testParseCommonsCSV() throws Exception {
for (int i = 0; i < max; i++) { for (int i = 0; i < max; i++) {
final BufferedReader reader = getReader(); final long startMillis;
final CSVParser parser = new CSVParser(reader, format); final Stats stats;
final long t0 = System.currentTimeMillis(); try (final BufferedReader reader = createReader()) {
final Stats s = iterate(parser); try (final CSVParser parser = new CSVParser(reader, format)) {
reader.close(); startMillis = System.currentTimeMillis();
show("CSV", s, t0); stats = iterate(parser);
parser.close(); }
} show("CSV", stats, startMillis);
show(); }
} }
show();
}
private static Constructor<Lexer> getLexerCtor(final String clazz) throws Exception { private static Constructor<Lexer> getLexerCtor(final String clazz) throws Exception {
@ -233,53 +239,59 @@ public class PerformanceTest {
return lexer.getConstructor(new Class<?>[]{CSVFormat.class, ExtendedBufferedReader.class}); return lexer.getConstructor(new Class<?>[]{CSVFormat.class, ExtendedBufferedReader.class});
} }
private static void testCSVLexer(final boolean newToken, final String test) throws Exception { private static void testCSVLexer(final boolean newToken, final String test) throws Exception {
Token token = new Token(); Token token = new Token();
String dynamic = ""; String dynamic = "";
for (int i = 0; i < max; i++) { for (int i = 0; i < max; i++) {
final ExtendedBufferedReader input = new ExtendedBufferedReader(getReader()); final String simpleName;
Lexer lexer = null; final Stats stats;
if (test.startsWith("CSVLexer")) { final long startMillis;
dynamic="!"; try (final ExtendedBufferedReader input = new ExtendedBufferedReader(createReader());
lexer = getLexerCtor(test).newInstance(new Object[]{format, input}); Lexer lexer = createTestCSVLexer(test, input)) {
} else { if (test.startsWith("CSVLexer")) {
lexer = new Lexer(format, input); dynamic = "!";
} }
int count = 0; simpleName = lexer.getClass().getSimpleName();
int fields = 0; int count = 0;
final long t0 = System.currentTimeMillis(); int fields = 0;
do { startMillis = System.currentTimeMillis();
if (newToken) { do {
token = new Token(); if (newToken) {
} else { token = new Token();
token.reset(); } else {
} token.reset();
lexer.nextToken(token); }
switch(token.type) { lexer.nextToken(token);
case EOF: switch (token.type) {
break; case EOF:
case EORECORD: break;
fields++; case EORECORD:
count++; fields++;
break; count++;
case INVALID: break;
throw new IOException("invalid parse sequence <"+token.content.toString()+">"); case INVALID:
case TOKEN: throw new IOException("invalid parse sequence <" + token.content.toString() + ">");
fields++; case TOKEN:
break; fields++;
case COMMENT: // not really expecting these break;
break; case COMMENT: // not really expecting these
default: break;
throw new IllegalStateException("Unexpected Token type: " + token.type); default:
} throw new IllegalStateException("Unexpected Token type: " + token.type);
}
} while (!token.type.equals(Token.Type.EOF));
stats = new Stats(count, fields);
}
show(simpleName + dynamic + " " + (newToken ? "new" : "reset"), stats, startMillis);
}
show();
}
} while (!token.type.equals(Token.Type.EOF)); private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input)
final Stats s = new Stats(count, fields); throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception {
input.close(); return test.startsWith("CSVLexer") ? getLexerCtor(test)
show(lexer.getClass().getSimpleName()+dynamic+" "+(newToken ? "new" : "reset"), s, t0); .newInstance(new Object[] { format, input }) : new Lexer(format, input);
} }
show();
}
private static Stats iterate(final Iterable<CSVRecord> it) { private static Stats iterate(final Iterable<CSVRecord> it) {
int count = 0; int count = 0;

View File

@ -29,12 +29,13 @@ public class JiraCsv164Test {
@Test @Test
public void testJiraCsv154_withCommentMarker() throws IOException { public void testJiraCsv154_withCommentMarker() throws IOException {
final String comment = "This is a header comment"; final String comment = "This is a header comment";
final CSVFormat format = CSVFormat.EXCEL.withHeader("H1", "H2").withCommentMarker('#').withHeaderComments(comment); final CSVFormat format = CSVFormat.EXCEL.withHeader("H1", "H2").withCommentMarker('#')
.withHeaderComments(comment);
final StringBuilder out = new StringBuilder(); final StringBuilder out = new StringBuilder();
final CSVPrinter printer = format.print(out); try (final CSVPrinter printer = format.print(out)) {
printer.print("A"); printer.print("A");
printer.print("B"); printer.print("B");
printer.close(); }
final String s = out.toString(); final String s = out.toString();
assertTrue(s, s.contains(comment)); assertTrue(s, s.contains(comment));
} }
@ -42,12 +43,13 @@ public class JiraCsv164Test {
@Test @Test
public void testJiraCsv154_withHeaderComments() throws IOException { public void testJiraCsv154_withHeaderComments() throws IOException {
final String comment = "This is a header comment"; final String comment = "This is a header comment";
final CSVFormat format = CSVFormat.EXCEL.withHeader("H1", "H2").withHeaderComments(comment).withCommentMarker('#'); final CSVFormat format = CSVFormat.EXCEL.withHeader("H1", "H2").withHeaderComments(comment)
.withCommentMarker('#');
final StringBuilder out = new StringBuilder(); final StringBuilder out = new StringBuilder();
final CSVPrinter printer = format.print(out); try (final CSVPrinter printer = format.print(out)) {
printer.print("A"); printer.print("A");
printer.print("B"); printer.print("B");
printer.close(); }
final String s = out.toString(); final String s = out.toString();
assertTrue(s, s.contains(comment)); assertTrue(s, s.contains(comment));
} }

View File

@ -33,23 +33,23 @@ public class JiraCsv167Test {
@Test @Test
public void parse() throws IOException { public void parse() throws IOException {
final BufferedReader br = new BufferedReader(getTestInput());
String s = null;
int totcomment = 0; int totcomment = 0;
int totrecs = 0; int totrecs = 0;
boolean lastWasComment = false; try (final BufferedReader br = new BufferedReader(getTestInput())) {
while((s=br.readLine()) != null) { String s = null;
if (s.startsWith("#")) { boolean lastWasComment = false;
if (!lastWasComment) { // comments are merged while ((s = br.readLine()) != null) {
totcomment++; if (s.startsWith("#")) {
if (!lastWasComment) { // comments are merged
totcomment++;
}
lastWasComment = true;
} else {
totrecs++;
lastWasComment = false;
} }
lastWasComment = true;
} else {
totrecs++;
lastWasComment = false;
} }
} }
br.close();
CSVFormat format = CSVFormat.DEFAULT; CSVFormat format = CSVFormat.DEFAULT;
// //
format = format.withAllowMissingColumnNames(false); format = format.withAllowMissingColumnNames(false);
@ -66,13 +66,14 @@ public class JiraCsv167Test {
format = format.withRecordSeparator('\n'); format = format.withRecordSeparator('\n');
format = format.withSkipHeaderRecord(false); format = format.withSkipHeaderRecord(false);
// //
final CSVParser parser = format.parse(getTestInput());
int comments = 0; int comments = 0;
int records = 0; int records = 0;
for (final CSVRecord csvRecord : parser) { try (final CSVParser parser = format.parse(getTestInput())) {
records++; for (final CSVRecord csvRecord : parser) {
if (csvRecord.hasComment()) { records++;
comments++; if (csvRecord.hasComment()) {
comments++;
}
} }
} }
// Comment lines are concatenated, in this example 4 lines become 2 comments. // Comment lines are concatenated, in this example 4 lines become 2 comments.

View File

@ -56,15 +56,15 @@ public class PerformanceTest {
return; return;
} }
System.out.println("Decompressing test fixture " + BIG_FILE + "..."); System.out.println("Decompressing test fixture " + BIG_FILE + "...");
final InputStream input = new GZIPInputStream(new FileInputStream("src/test/resources/perf/worldcitiespop.txt.gz")); try (final InputStream input = new GZIPInputStream(
final OutputStream output = new FileOutputStream(BIG_FILE); new FileInputStream("src/test/resources/perf/worldcitiespop.txt.gz"));
IOUtils.copy(input, output); final OutputStream output = new FileOutputStream(BIG_FILE)) {
System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length())); IOUtils.copy(input, output);
input.close(); System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length()));
output.close(); }
} }
private BufferedReader getBufferedReader() throws IOException { private BufferedReader createBufferedReader() throws IOException {
return new BufferedReader(new FileReader(BIG_FILE)); return new BufferedReader(new FileReader(BIG_FILE));
} }
@ -96,7 +96,7 @@ public class PerformanceTest {
public long testParseBigFile(final boolean traverseColumns) throws Exception { public long testParseBigFile(final boolean traverseColumns) throws Exception {
final long startMillis = System.currentTimeMillis(); final long startMillis = System.currentTimeMillis();
final long count = this.parse(this.getBufferedReader(), traverseColumns); final long count = this.parse(this.createBufferedReader(), traverseColumns);
final long totalMillis = System.currentTimeMillis() - startMillis; final long totalMillis = System.currentTimeMillis() - startMillis;
this.println(String.format("File parsed in %,d milliseconds with Commons CSV: %,d lines.", totalMillis, count)); this.println(String.format("File parsed in %,d milliseconds with Commons CSV: %,d lines.", totalMillis, count));
return totalMillis; return totalMillis;
@ -115,13 +115,12 @@ public class PerformanceTest {
public void testReadBigFile() throws Exception { public void testReadBigFile() throws Exception {
long bestTime = Long.MAX_VALUE; long bestTime = Long.MAX_VALUE;
for (int i = 0; i < this.max; i++) { for (int i = 0; i < this.max; i++) {
final BufferedReader in = this.getBufferedReader(); final long startMillis;
final long startMillis = System.currentTimeMillis(); long count;
long count = 0; try (final BufferedReader in = this.createBufferedReader()) {
try { startMillis = System.currentTimeMillis();
count = 0;
count = this.readAll(in); count = this.readAll(in);
} finally {
in.close();
} }
final long totalMillis = System.currentTimeMillis() - startMillis; final long totalMillis = System.currentTimeMillis() - startMillis;
bestTime = Math.min(totalMillis, bestTime); bestTime = Math.min(totalMillis, bestTime);