diff --git a/src/changes/changes.xml b/src/changes/changes.xml index b735b174..67681987 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -38,7 +38,9 @@ Release Notes - + + CSVFormat.EXCEL should ignore empty header names + No longer works with Java 6 NullPointerException when empty header string and and null string of "" diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index 8cf030d9..0338732f 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -206,16 +206,17 @@ public final class CSVFormat implements Serializable { * Settings are: *

* *

- * Note: this is currently the same as {@link #RFC4180}. + * Note: this is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean) withAllowMissingColumnNames(true)}. *

*/ - public static final CSVFormat EXCEL = DEFAULT.withIgnoreEmptyLines(false); + public static final CSVFormat EXCEL = DEFAULT.withIgnoreEmptyLines(false).withAllowMissingColumnNames(true); /** * Tab-delimited format. diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index adbf9e40..059a7395 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -52,32 +52,24 @@ import org.junit.Test; /** * CSVParserTest * - * The test are organized in three different sections: - * The 'setter/getter' section, the lexer section and finally the parser - * section. In case a test fails, you should follow a top-down approach for - * fixing a potential bug (its likely that the parser itself fails if the lexer - * has problems...). + * The test are organized in three different sections: The 'setter/getter' section, the lexer section and finally the + * parser section. In case a test fails, you should follow a top-down approach for fixing a potential bug (its likely + * that the parser itself fails if the lexer has problems...). * * @version $Id$ */ public class CSVParserTest { - private static final String CSV_INPUT = "a,b,c,d\n" - + " a , b , 1 2 \n" - + "\"foo baar\", b,\n" - // + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n"; - + " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping + private static final String CSV_INPUT = "a,b,c,d\n" + " a , b , 1 2 \n" + "\"foo baar\", b,\n" + // + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n"; + + " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping private static final String CSV_INPUT_1 = "a,b,c,d"; private static final String CSV_INPUT_2 = "a,b,1 2"; - private static final String[][] RESULT = { - {"a", "b", "c", "d"}, - {"a", "b", "1 2"}, - {"foo baar", "b", ""}, - {"foo\n,,\n\",,\n\"", "d", "e"} - }; + private static final String[][] RESULT = { { "a", "b", "c", "d" }, { "a", "b", "1 2" }, { "foo baar", "b", "" }, + { "foo\n,,\n\",,\n\"", "d", "e" } }; @Test public void testBackslashEscaping() throws IOException { @@ -86,34 +78,29 @@ public class CSVParserTest { // We will test with a forward slash as the escape char, and a single // quote as the encapsulator. - final String code = - "one,two,three\n" // 0 - + "'',''\n" // 1) empty encapsulators - + "/',/'\n" // 2) single encapsulators - + "'/'','/''\n" // 3) single encapsulators encapsulated via escape - + "'''',''''\n" // 4) single encapsulators encapsulated via doubling - + "/,,/,\n" // 5) separator escaped - + "//,//\n" // 6) escape escaped - + "'//','//'\n" // 7) escape escaped in encapsulation - + " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces - + "9, /\n \n" // escaped newline - + ""; - final String[][] res = { - {"one", "two", "three"}, // 0 - {"", ""}, // 1 - {"'", "'"}, // 2 - {"'", "'"}, // 3 - {"'", "'"}, // 4 - {",", ","}, // 5 - {"/", "/"}, // 6 - {"/", "/"}, // 7 - {" 8 ", " \"quoted \"\" /\" / string\" "}, - {"9", " \n "}, - }; + final String code = "one,two,three\n" // 0 + + "'',''\n" // 1) empty encapsulators + + "/',/'\n" // 2) single encapsulators + + "'/'','/''\n" // 3) single encapsulators encapsulated via escape + + "'''',''''\n" // 4) single encapsulators encapsulated via doubling + + "/,,/,\n" // 5) separator escaped + + "//,//\n" // 6) escape escaped + + "'//','//'\n" // 7) escape escaped in encapsulation + + " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces + + "9, /\n \n" // escaped newline + + ""; + final String[][] res = { { "one", "two", "three" }, // 0 + { "", "" }, // 1 + { "'", "'" }, // 2 + { "'", "'" }, // 3 + { "'", "'" }, // 4 + { ",", "," }, // 5 + { "/", "/" }, // 6 + { "/", "/" }, // 7 + { " 8 ", " \"quoted \"\" /\" / string\" " }, { "9", " \n " }, }; - - final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'') - .withRecordSeparator(CRLF).withEscape('/').withIgnoreEmptyLines(true); + final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'').withRecordSeparator(CRLF).withEscape('/') + .withIgnoreEmptyLines(true); final CSVParser parser = CSVParser.parse(code, format); final List records = parser.getRecords(); @@ -130,20 +117,17 @@ public class CSVParserTest { // We will test with a forward slash as the escape char, and a single // quote as the encapsulator. - final String code = "" - + " , , \n" // 1) - + " \t , , \n" // 2) - + " // , /, , /,\n" // 3) + final String code = "" + " , , \n" // 1) + + " \t , , \n" // 2) + + " // , /, , /,\n" // 3) + ""; - final String[][] res = { - {" ", " ", " "}, // 1 - {" \t ", " ", " "}, // 2 - {" / ", " , ", " ,"}, // 3 + final String[][] res = { { " ", " ", " " }, // 1 + { " \t ", " ", " " }, // 2 + { " / ", " , ", " ," }, // 3 }; - - final CSVFormat format = CSVFormat.newFormat(',') - .withRecordSeparator(CRLF).withEscape('/').withIgnoreEmptyLines(true); + final CSVFormat format = CSVFormat.newFormat(',').withRecordSeparator(CRLF).withEscape('/') + .withIgnoreEmptyLines(true); final CSVParser parser = CSVParser.parse(code, format); final List records = parser.getRecords(); @@ -156,26 +140,13 @@ public class CSVParserTest { @Test @Ignore public void testBackslashEscapingOld() throws IOException { - final String code = - "one,two,three\n" - + "on\\\"e,two\n" - + "on\"e,two\n" - + "one,\"tw\\\"o\"\n" - + "one,\"t\\,wo\"\n" - + "one,two,\"th,ree\"\n" - + "\"a\\\\\"\n" - + "a\\,b\n" - + "\"a\\\\,b\""; - final String[][] res = { - {"one", "two", "three"}, - {"on\\\"e", "two"}, - {"on\"e", "two"}, - {"one", "tw\"o"}, - {"one", "t\\,wo"}, // backslash in quotes only escapes a delimiter (",") - {"one", "two", "th,ree"}, - {"a\\\\"}, // backslash in quotes only escapes a delimiter (",") - {"a\\", "b"}, // a backslash must be returnd - {"a\\\\,b"} // backslash in quotes only escapes a delimiter (",") + final String code = "one,two,three\n" + "on\\\"e,two\n" + "on\"e,two\n" + "one,\"tw\\\"o\"\n" + + "one,\"t\\,wo\"\n" + "one,two,\"th,ree\"\n" + "\"a\\\\\"\n" + "a\\,b\n" + "\"a\\\\,b\""; + final String[][] res = { { "one", "two", "three" }, { "on\\\"e", "two" }, { "on\"e", "two" }, + { "one", "tw\"o" }, { "one", "t\\,wo" }, // backslash in quotes only escapes a delimiter (",") + { "one", "two", "th,ree" }, { "a\\\\" }, // backslash in quotes only escapes a delimiter (",") + { "a\\", "b" }, // a backslash must be returnd + { "a\\\\,b" } // backslash in quotes only escapes a delimiter (",") }; final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT); final List records = parser.getRecords(); @@ -196,7 +167,7 @@ public class CSVParserTest { for (final CSVRecord record : parser) { final String string = record.get("Date"); Assert.assertNotNull(string); - //System.out.println("date: " + record.get("Date")); + // System.out.println("date: " + record.get("Date")); } } finally { parser.close(); @@ -212,7 +183,7 @@ public class CSVParserTest { for (final CSVRecord record : parser) { final String string = record.get("Date"); Assert.assertNotNull(string); - //System.out.println("date: " + record.get("Date")); + // System.out.println("date: " + record.get("Date")); } } finally { parser.close(); @@ -260,18 +231,12 @@ public class CSVParserTest { @Test public void testDefaultFormat() throws IOException { - final String code = "" - + "a,b#\n" // 1) - + "\"\n\",\" \",#\n" // 2) - + "#,\"\"\n" // 3) + final String code = "" + "a,b#\n" // 1) + + "\"\n\",\" \",#\n" // 2) + + "#,\"\"\n" // 3) + "# Final comment\n"// 4) - ; - final String[][] res = { - {"a", "b#"}, - {"\n", " ", "#"}, - {"#", ""}, - {"# Final comment"} - }; + ; + final String[][] res = { { "a", "b#" }, { "\n", " ", "#" }, { "#", "" }, { "# Final comment" } }; CSVFormat format = CSVFormat.DEFAULT; assertFalse(format.isCommentMarkerSet()); @@ -282,10 +247,7 @@ public class CSVParserTest { Utils.compare("Failed to parse without comments", res, records); - final String[][] res_comments = { - {"a", "b#"}, - {"\n", " ", "#"}, - }; + final String[][] res_comments = { { "a", "b#" }, { "\n", " ", "#" }, }; format = CSVFormat.DEFAULT.withCommentMarker('#'); parser.close(); @@ -305,14 +267,8 @@ public class CSVParserTest { @Test public void testEmptyLineBehaviourCSV() throws Exception { - final String[] codes = { - "hello,\r\n\r\n\r\n", - "hello,\n\n\n", - "hello,\"\"\r\n\r\n\r\n", - "hello,\"\"\n\n\n" - }; - final String[][] res = { - {"hello", ""} // CSV format ignores empty lines + final String[] codes = { "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" }; + final String[][] res = { { "hello", "" } // CSV format ignores empty lines }; for (final String code : codes) { final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT); @@ -328,17 +284,9 @@ public class CSVParserTest { @Test public void testEmptyLineBehaviourExcel() throws Exception { - final String[] codes = { - "hello,\r\n\r\n\r\n", - "hello,\n\n\n", - "hello,\"\"\r\n\r\n\r\n", - "hello,\"\"\n\n\n" - }; - final String[][] res = { - {"hello", ""}, - {""}, // Excel format does not ignore empty lines - {""} - }; + final String[] codes = { "hello,\r\n\r\n\r\n", "hello,\n\n\n", "hello,\"\"\r\n\r\n\r\n", "hello,\"\"\n\n\n" }; + final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines + { "" } }; for (final String code : codes) { final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL); final List records = parser.getRecords(); @@ -353,20 +301,11 @@ public class CSVParserTest { @Test public void testEndOfFileBehaviorCSV() throws Exception { - final String[] codes = { - "hello,\r\n\r\nworld,\r\n", - "hello,\r\n\r\nworld,", - "hello,\r\n\r\nworld,\"\"\r\n", - "hello,\r\n\r\nworld,\"\"", - "hello,\r\n\r\nworld,\n", - "hello,\r\n\r\nworld,", - "hello,\r\n\r\nworld,\"\"\n", - "hello,\r\n\r\nworld,\"\"" - }; - final String[][] res = { - {"hello", ""}, // CSV format ignores empty lines - {"world", ""} - }; + final String[] codes = { "hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", + "hello,\r\n\r\nworld,\"\"", "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", + "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\"" }; + final String[][] res = { { "hello", "" }, // CSV format ignores empty lines + { "world", "" } }; for (final String code : codes) { final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT); final List records = parser.getRecords(); @@ -381,21 +320,11 @@ public class CSVParserTest { @Test public void testEndOfFileBehaviourExcel() throws Exception { - final String[] codes = { - "hello,\r\n\r\nworld,\r\n", - "hello,\r\n\r\nworld,", - "hello,\r\n\r\nworld,\"\"\r\n", - "hello,\r\n\r\nworld,\"\"", - "hello,\r\n\r\nworld,\n", - "hello,\r\n\r\nworld,", - "hello,\r\n\r\nworld,\"\"\n", - "hello,\r\n\r\nworld,\"\"" - }; - final String[][] res = { - {"hello", ""}, - {""}, // Excel format does not ignore empty lines - {"world", ""} - }; + final String[] codes = { "hello,\r\n\r\nworld,\r\n", "hello,\r\n\r\nworld,", "hello,\r\n\r\nworld,\"\"\r\n", + "hello,\r\n\r\nworld,\"\"", "hello,\r\n\r\nworld,\n", "hello,\r\n\r\nworld,", + "hello,\r\n\r\nworld,\"\"\n", "hello,\r\n\r\nworld,\"\"" }; + final String[][] res = { { "hello", "" }, { "" }, // Excel format does not ignore empty lines + { "world", "" } }; for (final String code : codes) { final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL); @@ -411,16 +340,10 @@ public class CSVParserTest { @Test public void testExcelFormat1() throws IOException { - final String code = - "value1,value2,value3,value4\r\na,b,c,d\r\n x,,," - + "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n"; - final String[][] res = { - {"value1", "value2", "value3", "value4"}, - {"a", "b", "c", "d"}, - {" x", "", "", ""}, - {""}, - {"\"hello\"", " \"world\"", "abc\ndef", ""} - }; + final String code = "value1,value2,value3,value4\r\na,b,c,d\r\n x,,," + + "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n"; + final String[][] res = { { "value1", "value2", "value3", "value4" }, { "a", "b", "c", "d" }, + { " x", "", "", "" }, { "" }, { "\"hello\"", " \"world\"", "abc\ndef", "" } }; final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL); final List records = parser.getRecords(); assertEquals(res.length, records.size()); @@ -434,13 +357,7 @@ public class CSVParserTest { @Test public void testExcelFormat2() throws Exception { final String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n"; - final String[][] res = { - {"foo", "baar"}, - {""}, - {"hello", ""}, - {""}, - {"world", ""} - }; + final String[][] res = { { "foo", "baar" }, { "" }, { "hello", "" }, { "" }, { "world", "" } }; final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL); final List records = parser.getRecords(); assertEquals(res.length, records.size()); @@ -451,6 +368,24 @@ public class CSVParserTest { parser.close(); } + /** + * Tests an exported Excel worksheet with a header row and rows that have more columns than the headers + */ + @Test + public void testExcelHeaderCountLessThanData() throws Exception { + final String code = "A,B,C,,\r\na,b,c,d,e\r\n"; + final CSVParser parser = CSVParser.parse(code, CSVFormat.EXCEL.withHeader()); + try { + for (CSVRecord record : parser.getRecords()) { + Assert.assertEquals("a", record.get("A")); + Assert.assertEquals("b", record.get("B")); + Assert.assertEquals("c", record.get("C")); + } + } finally { + parser.close(); + } + } + @Test public void testForEach() throws Exception { final List records = new ArrayList(); @@ -462,9 +397,9 @@ public class CSVParserTest { } assertEquals(3, records.size()); - assertArrayEquals(new String[]{"a", "b", "c"}, records.get(0).values()); - assertArrayEquals(new String[]{"1", "2", "3"}, records.get(1).values()); - assertArrayEquals(new String[]{"x", "y", "z"}, records.get(2).values()); + assertArrayEquals(new String[] { "a", "b", "c" }, records.get(0).values()); + assertArrayEquals(new String[] { "1", "2", "3" }, records.get(1).values()); + assertArrayEquals(new String[] { "x", "y", "z" }, records.get(2).values()); } @Test @@ -493,7 +428,7 @@ public class CSVParserTest { @Test(expected = IllegalArgumentException.class) public void testDuplicateHeaders() throws Exception { - CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader(new String[]{})); + CSVParser.parse("a,b,a\n1,2,3\nx,y,z", CSVFormat.DEFAULT.withHeader(new String[] {})); } @Test @@ -584,8 +519,8 @@ public class CSVParserTest { @Test public void testGetRecordWithMultiLineValues() throws Exception { - final CSVParser parser = CSVParser.parse("\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + "\"c\r\n1\",\"c\r\n2\"", - CSVFormat.DEFAULT.withRecordSeparator(CRLF)); + final CSVParser parser = CSVParser.parse("\"a\r\n1\",\"a\r\n2\"" + CRLF + "\"b\r\n1\",\"b\r\n2\"" + CRLF + + "\"c\r\n1\",\"c\r\n2\"", CSVFormat.DEFAULT.withRecordSeparator(CRLF)); CSVRecord record; assertEquals(0, parser.getRecordNumber()); assertEquals(0, parser.getCurrentLineNumber()); @@ -640,7 +575,7 @@ public class CSVParserTest { assertFalse(records.hasNext()); } - @Test(expected=IllegalArgumentException.class) + @Test(expected = IllegalArgumentException.class) public void testHeadersMissingException() throws Exception { final Reader in = new StringReader("a,,c,,d\n1,2,3,4\nx,y,z,zz"); CSVFormat.DEFAULT.withHeader().parse(in).iterator(); @@ -678,8 +613,8 @@ public class CSVParserTest { @Test public void testIgnoreEmptyLines() throws IOException { final String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n"; - //String code = "world\r\n\n"; - //String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n"; + // String code = "world\r\n\n"; + // String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n"; final CSVParser parser = CSVParser.parse(code, CSVFormat.DEFAULT); final List records = parser.getRecords(); assertEquals(3, records.size()); @@ -705,12 +640,12 @@ public class CSVParserTest { } catch (final UnsupportedOperationException expected) { // expected } - assertArrayEquals(new String[]{"a", "b", "c"}, iterator.next().values()); - assertArrayEquals(new String[]{"1", "2", "3"}, iterator.next().values()); + assertArrayEquals(new String[] { "a", "b", "c" }, iterator.next().values()); + assertArrayEquals(new String[] { "1", "2", "3" }, iterator.next().values()); assertTrue(iterator.hasNext()); assertTrue(iterator.hasNext()); assertTrue(iterator.hasNext()); - assertArrayEquals(new String[]{"x", "y", "z"}, iterator.next().values()); + assertArrayEquals(new String[] { "x", "y", "z" }, iterator.next().values()); assertFalse(iterator.hasNext()); try { @@ -765,7 +700,8 @@ public class CSVParserTest { assertFalse(records.hasNext()); } - @Test // TODO this may lead to strange behavior, throw an exception if iterator() has already been called? + @Test + // TODO this may lead to strange behavior, throw an exception if iterator() has already been called? public void testMultipleIterators() throws Exception { final CSVParser parser = CSVParser.parse("a,b,c" + CR + "d,e,f", CSVFormat.DEFAULT); @@ -914,7 +850,8 @@ public class CSVParserTest { } private void validateLineNumbers(final String lineSeparator) throws IOException { - final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator)); + final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", + CSVFormat.DEFAULT.withRecordSeparator(lineSeparator)); assertEquals(0, parser.getCurrentLineNumber()); assertNotNull(parser.nextRecord()); assertEquals(1, parser.getCurrentLineNumber()); @@ -930,7 +867,8 @@ public class CSVParserTest { } private void validateRecordNumbers(final String lineSeparator) throws IOException { - final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator)); + final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", + CSVFormat.DEFAULT.withRecordSeparator(lineSeparator)); CSVRecord record; assertEquals(0, parser.getRecordNumber()); assertNotNull(record = parser.nextRecord());