diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java index 1067dd26..89e63e3a 100644 --- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java +++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java @@ -122,19 +122,6 @@ final class ExtendedBufferedReader extends BufferedReader { return c; } - /** - * Returns the next n characters in the current reader without consuming them. The next call to {@link #read()} will still return the next value. This - * doesn't affect line number or last character. - * - * @param n the number characters look ahead. - * @return the next n characters. - * @throws IOException If an I/O error occurs - */ - char[] lookAhead(final int n) throws IOException { - final char[] buf = new char[n]; - return lookAhead(buf); - } - /** * Populates the buffer with the next {@code buf.length} characters in the * current reader without consuming them. The next call to {@link #read()} will @@ -154,6 +141,19 @@ final class ExtendedBufferedReader extends BufferedReader { return buf; } + /** + * Returns the next n characters in the current reader without consuming them. The next call to {@link #read()} will still return the next value. This + * doesn't affect line number or last character. + * + * @param n the number characters look ahead. + * @return the next n characters. + * @throws IOException If an I/O error occurs + */ + char[] lookAhead(final int n) throws IOException { + final char[] buf = new char[n]; + return lookAhead(buf); + } + @Override public int read() throws IOException { final int current = super.read(); diff --git a/src/test/java/org/apache/commons/csv/CSVFileParserTest.java b/src/test/java/org/apache/commons/csv/CSVFileParserTest.java index 96ac8749..4d9b8711 100644 --- a/src/test/java/org/apache/commons/csv/CSVFileParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVFileParserTest.java @@ -42,6 +42,11 @@ public class CSVFileParserTest { private static final File BASE_DIR = new File("src/test/resources/org/apache/commons/csv/CSVFileParser"); + public static Stream generateData() { + final File[] files = BASE_DIR.listFiles((dir, name) -> name.startsWith("test") && name.endsWith(".txt")); + return files != null ? Stream.of(files) : Stream.empty(); + } + private String readTestData(final BufferedReader reader) throws IOException { String line; do { @@ -50,11 +55,6 @@ public class CSVFileParserTest { return line; } - public static Stream generateData() { - final File[] files = BASE_DIR.listFiles((dir, name) -> name.startsWith("test") && name.endsWith(".txt")); - return files != null ? Stream.of(files) : Stream.empty(); - } - @ParameterizedTest @MethodSource("generateData") public void testCSVFile(final File testFile) throws Exception { diff --git a/src/test/java/org/apache/commons/csv/CSVFormatPredefinedTest.java b/src/test/java/org/apache/commons/csv/CSVFormatPredefinedTest.java index a3e6ecd0..d62f41f3 100644 --- a/src/test/java/org/apache/commons/csv/CSVFormatPredefinedTest.java +++ b/src/test/java/org/apache/commons/csv/CSVFormatPredefinedTest.java @@ -41,6 +41,16 @@ public class CSVFormatPredefinedTest { test(CSVFormat.EXCEL, "Excel"); } + @Test + public void testMongoDbCsv() { + test(CSVFormat.MONGODB_CSV, "MongoDBCsv"); + } + + @Test + public void testMongoDbTsv() { + test(CSVFormat.MONGODB_TSV, "MongoDBTsv"); + } + @Test public void testMySQL() { test(CSVFormat.MYSQL, "MySQL"); @@ -56,16 +66,6 @@ public class CSVFormatPredefinedTest { test(CSVFormat.POSTGRESQL_CSV, "PostgreSQLCsv"); } - @Test - public void testMongoDbCsv() { - test(CSVFormat.MONGODB_CSV, "MongoDBCsv"); - } - - @Test - public void testMongoDbTsv() { - test(CSVFormat.MONGODB_TSV, "MongoDBTsv"); - } - @Test public void testPostgreSqlText() { test(CSVFormat.POSTGRESQL_TEXT, "PostgreSQLText"); diff --git a/src/test/java/org/apache/commons/csv/CSVRecordTest.java b/src/test/java/org/apache/commons/csv/CSVRecordTest.java index a7f3bcd2..39b02ba2 100644 --- a/src/test/java/org/apache/commons/csv/CSVRecordTest.java +++ b/src/test/java/org/apache/commons/csv/CSVRecordTest.java @@ -264,6 +264,15 @@ public class CSVRecordTest { } } + @Test + public void testStream() { + final AtomicInteger i = new AtomicInteger(); + record.stream().forEach(value -> { + assertEquals(values[i.get()], value); + i.incrementAndGet(); + }); + } + @Test public void testToList() { int i = 0; @@ -297,15 +306,6 @@ public class CSVRecordTest { } } - @Test - public void testStream() { - final AtomicInteger i = new AtomicInteger(); - record.stream().forEach(value -> { - assertEquals(values[i.get()], value); - i.incrementAndGet(); - }); - } - @Test public void testToString() { assertNotNull(recordWithHeader.toString()); diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java index b2897739..cc8d728a 100644 --- a/src/test/java/org/apache/commons/csv/LexerTest.java +++ b/src/test/java/org/apache/commons/csv/LexerTest.java @@ -47,63 +47,65 @@ public class LexerTest { private CSVFormat formatWithEscaping; - @BeforeEach - public void setUp() { - formatWithEscaping = CSVFormat.DEFAULT.withEscape('\\'); - } - @SuppressWarnings("resource") private Lexer createLexer(final String input, final CSVFormat format) { return new Lexer(format, new ExtendedBufferedReader(new StringReader(input))); } - @Test - public void testSurroundingSpacesAreDeleted() throws IOException { - final String code = "noSpaces, leadingSpaces,trailingSpaces , surroundingSpaces , ,,"; - try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "noSpaces")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingSpaces")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingSpaces")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingSpaces")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); - assertThat(parser.nextToken(new Token()), matches(EOF, "")); - } + @BeforeEach + public void setUp() { + formatWithEscaping = CSVFormat.DEFAULT.withEscape('\\'); } + // simple token with escaping enabled @Test - public void testSurroundingTabsAreDeleted() throws IOException { - final String code = "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,,"; - try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "noTabs")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingTab")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingTab")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingTabs")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); - assertThat(parser.nextToken(new Token()), matches(EOF, "")); - } - } - - @Test - public void testIgnoreEmptyLines() throws IOException { - final String code = "first,line,\n" + "\n" + "\n" + "second,line\n" + "\n" + "\n" + "third line \n" + "\n" + - "\n" + "last, line \n" + "\n" + "\n" + "\n"; - final CSVFormat format = CSVFormat.DEFAULT.withIgnoreEmptyLines(); + public void testBackslashWithEscaping() throws IOException { + /* + * file: a,\,,b \,, + */ + final String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\r\ne"; + final CSVFormat format = formatWithEscaping.withIgnoreEmptyLines(false); + assertTrue(format.isEscapeCharacterSet()); try (final Lexer parser = createLexer(code, format)) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "first")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "line")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "second")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "line")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "third line ")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "last")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, " line ")); - assertThat(parser.nextToken(new Token()), matches(EOF, "")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, ",")); + assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, ",")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "\nc")); + assertThat(parser.nextToken(new Token()), matches(EORECORD, "d\r")); + assertThat(parser.nextToken(new Token()), matches(EOF, "e")); + } + } + + // simple token with escaping not enabled + @Test + public void testBackslashWithoutEscaping() throws IOException { + /* + * file: a,\,,b \,, + */ + final String code = "a,\\,,b\\\n\\,,"; + final CSVFormat format = CSVFormat.DEFAULT; + assertFalse(format.isEscapeCharacterSet()); + try (final Lexer parser = createLexer(code, format)) { + assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); + // an unquoted single backslash is not an escape char + assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); + assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\")); + // an unquoted single backslash is not an escape char + assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); assertThat(parser.nextToken(new Token()), matches(EOF, "")); } } + @Test + public void testBackspace() throws Exception { + try (final Lexer lexer = createLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping)) { + assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "NotEscaped")); + } + } + @Test public void testComments() throws IOException { final String code = "first,line,\n" + "second,line,tokenWith#no-comment\n" + "# comment line \n" + @@ -173,45 +175,141 @@ public class LexerTest { } } - // simple token with escaping not enabled @Test - public void testBackslashWithoutEscaping() throws IOException { - /* - * file: a,\,,b \,, - */ - final String code = "a,\\,,b\\\n\\,,"; - final CSVFormat format = CSVFormat.DEFAULT; - assertFalse(format.isEscapeCharacterSet()); + public void testCR() throws Exception { + try (final Lexer lexer = createLexer("character" + CR + "NotEscaped", formatWithEscaping)) { + assertThat(lexer.nextToken(new Token()), hasContent("character")); + assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped")); + } + } + + // From CSV-1 + @Test + public void testDelimiterIsWhitespace() throws IOException { + final String code = "one\ttwo\t\tfour \t five\t six"; + try (final Lexer parser = createLexer(code, CSVFormat.TDF)) { + assertThat(parser.nextToken(new Token()), matches(TOKEN, "one")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "two")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "four")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "five")); + assertThat(parser.nextToken(new Token()), matches(EOF, "six")); + } + } + + @Test // TODO is this correct? Do we expect BACKSPACE to be unescaped? + public void testEscapedBackspace() throws Exception { + try (final Lexer lexer = createLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping)) { + assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "Escaped")); + } + } + + @Test + public void testEscapedCharacter() throws Exception { + try (final Lexer lexer = createLexer("character\\aEscaped", formatWithEscaping)) { + assertThat(lexer.nextToken(new Token()), hasContent("character\\aEscaped")); + } + } + + @Test + public void testEscapedControlCharacter() throws Exception { + // we are explicitly using an escape different from \ here + try (final Lexer lexer = createLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!'))) { + assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); + } + } + + @Test + public void testEscapedControlCharacter2() throws Exception { + try (final Lexer lexer = createLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\'))) { + assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); + } + } + + @Test + public void testEscapedCR() throws Exception { + try (final Lexer lexer = createLexer("character\\" + CR + "Escaped", formatWithEscaping)) { + assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); + } + } + + @Test // TODO is this correct? Do we expect FF to be unescaped? + public void testEscapedFF() throws Exception { + try (final Lexer lexer = createLexer("character\\" + FF + "Escaped", formatWithEscaping)) { + assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "Escaped")); + } + } + + @Test + public void testEscapedLF() throws Exception { + try (final Lexer lexer = createLexer("character\\" + LF + "Escaped", formatWithEscaping)) { + assertThat(lexer.nextToken(new Token()), hasContent("character" + LF + "Escaped")); + } + } + + @Test + public void testEscapedMySqlNullValue() throws Exception { + // MySQL uses \N to symbolize null values. We have to restore this + try (final Lexer lexer = createLexer("character\\NEscaped", formatWithEscaping)) { + assertThat(lexer.nextToken(new Token()), hasContent("character\\NEscaped")); + } + } + + @Test // TODO is this correct? Do we expect TAB to be unescaped? + public void testEscapedTab() throws Exception { + try (final Lexer lexer = createLexer("character\\" + TAB + "Escaped", formatWithEscaping)) { + assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "Escaped")); + } + + } + + @Test + public void testEscapingAtEOF() throws Exception { + final String code = "escaping at EOF is evil\\"; + try (final Lexer lexer = createLexer(code, formatWithEscaping)) { + assertThrows(IOException.class, () -> lexer.nextToken(new Token())); + } + } + + @Test + public void testFF() throws Exception { + try (final Lexer lexer = createLexer("character" + FF + "NotEscaped", formatWithEscaping)) { + assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "NotEscaped")); + } + } + + @Test + public void testIgnoreEmptyLines() throws IOException { + final String code = "first,line,\n" + "\n" + "\n" + "second,line\n" + "\n" + "\n" + "third line \n" + "\n" + + "\n" + "last, line \n" + "\n" + "\n" + "\n"; + final CSVFormat format = CSVFormat.DEFAULT.withIgnoreEmptyLines(); try (final Lexer parser = createLexer(code, format)) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); - // an unquoted single backslash is not an escape char - assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\")); - // an unquoted single backslash is not an escape char - assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "first")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "line")); + assertThat(parser.nextToken(new Token()), matches(EORECORD, "")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "second")); + assertThat(parser.nextToken(new Token()), matches(EORECORD, "line")); + assertThat(parser.nextToken(new Token()), matches(EORECORD, "third line ")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "last")); + assertThat(parser.nextToken(new Token()), matches(EORECORD, " line ")); + assertThat(parser.nextToken(new Token()), matches(EOF, "")); assertThat(parser.nextToken(new Token()), matches(EOF, "")); } } - // simple token with escaping enabled @Test - public void testBackslashWithEscaping() throws IOException { - /* - * file: a,\,,b \,, - */ - final String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\r\ne"; - final CSVFormat format = formatWithEscaping.withIgnoreEmptyLines(false); - assertTrue(format.isEscapeCharacterSet()); - try (final Lexer parser = createLexer(code, format)) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, ",")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, ",")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "\nc")); - assertThat(parser.nextToken(new Token()), matches(EORECORD, "d\r")); - assertThat(parser.nextToken(new Token()), matches(EOF, "e")); + public void testIsMetaCharCommentStart() throws IOException { + try (final Lexer lexer = createLexer("#", CSVFormat.DEFAULT.withCommentMarker('#'))) { + final int ch = lexer.readEscape(); + assertEquals('#', ch); + } + } + + @Test + public void testLF() throws Exception { + try (final Lexer lexer = createLexer("character" + LF + "NotEscaped", formatWithEscaping)) { + assertThat(lexer.nextToken(new Token()), hasContent("character")); + assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped")); } } @@ -266,148 +364,6 @@ public class LexerTest { } } - // From CSV-1 - @Test - public void testDelimiterIsWhitespace() throws IOException { - final String code = "one\ttwo\t\tfour \t five\t six"; - try (final Lexer parser = createLexer(code, CSVFormat.TDF)) { - assertThat(parser.nextToken(new Token()), matches(TOKEN, "one")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "two")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "four")); - assertThat(parser.nextToken(new Token()), matches(TOKEN, "five")); - assertThat(parser.nextToken(new Token()), matches(EOF, "six")); - } - } - - @Test - public void testEscapedCR() throws Exception { - try (final Lexer lexer = createLexer("character\\" + CR + "Escaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); - } - } - - @Test - public void testCR() throws Exception { - try (final Lexer lexer = createLexer("character" + CR + "NotEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character")); - assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped")); - } - } - - @Test - public void testEscapedLF() throws Exception { - try (final Lexer lexer = createLexer("character\\" + LF + "Escaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + LF + "Escaped")); - } - } - - @Test - public void testLF() throws Exception { - try (final Lexer lexer = createLexer("character" + LF + "NotEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character")); - assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped")); - } - } - - @Test // TODO is this correct? Do we expect TAB to be unescaped? - public void testEscapedTab() throws Exception { - try (final Lexer lexer = createLexer("character\\" + TAB + "Escaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "Escaped")); - } - - } - - @Test - public void testTab() throws Exception { - try (final Lexer lexer = createLexer("character" + TAB + "NotEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "NotEscaped")); - } - } - - @Test // TODO is this correct? Do we expect BACKSPACE to be unescaped? - public void testEscapedBackspace() throws Exception { - try (final Lexer lexer = createLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "Escaped")); - } - } - - @Test - public void testBackspace() throws Exception { - try (final Lexer lexer = createLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "NotEscaped")); - } - } - - @Test // TODO is this correct? Do we expect FF to be unescaped? - public void testEscapedFF() throws Exception { - try (final Lexer lexer = createLexer("character\\" + FF + "Escaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "Escaped")); - } - } - - @Test - public void testFF() throws Exception { - try (final Lexer lexer = createLexer("character" + FF + "NotEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "NotEscaped")); - } - } - - @Test - public void testEscapedMySqlNullValue() throws Exception { - // MySQL uses \N to symbolize null values. We have to restore this - try (final Lexer lexer = createLexer("character\\NEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character\\NEscaped")); - } - } - - @Test - public void testEscapedCharacter() throws Exception { - try (final Lexer lexer = createLexer("character\\aEscaped", formatWithEscaping)) { - assertThat(lexer.nextToken(new Token()), hasContent("character\\aEscaped")); - } - } - - @Test - public void testEscapedControlCharacter() throws Exception { - // we are explicitly using an escape different from \ here - try (final Lexer lexer = createLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!'))) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); - } - } - - @Test - public void testEscapedControlCharacter2() throws Exception { - try (final Lexer lexer = createLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\'))) { - assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); - } - } - - @Test - public void testEscapingAtEOF() throws Exception { - final String code = "escaping at EOF is evil\\"; - try (final Lexer lexer = createLexer(code, formatWithEscaping)) { - assertThrows(IOException.class, () -> lexer.nextToken(new Token())); - } - } - - @Test - public void testTrimTrailingSpacesZeroLength() throws Exception { - final StringBuilder buffer = new StringBuilder(""); - final Lexer lexer = createLexer(buffer.toString(), CSVFormat.DEFAULT); - lexer.trimTrailingSpaces(buffer); - assertThat(lexer.nextToken(new Token()), matches(EOF, "")); - } - - @Test - public void testReadEscapeTab() throws IOException { - try (final Lexer lexer = createLexer("t", CSVFormat.DEFAULT.withEscape('\t'))) { - final int ch = lexer.readEscape(); - assertThat(lexer.nextToken(new Token()), matches(EOF, "")); - assertEquals(TAB, ch); - } - } - @Test public void testReadEscapeBackspace() throws IOException { try (final Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) { @@ -425,10 +381,54 @@ public class LexerTest { } @Test - public void testIsMetaCharCommentStart() throws IOException { - try (final Lexer lexer = createLexer("#", CSVFormat.DEFAULT.withCommentMarker('#'))) { + public void testReadEscapeTab() throws IOException { + try (final Lexer lexer = createLexer("t", CSVFormat.DEFAULT.withEscape('\t'))) { final int ch = lexer.readEscape(); - assertEquals('#', ch); + assertThat(lexer.nextToken(new Token()), matches(EOF, "")); + assertEquals(TAB, ch); } } + + @Test + public void testSurroundingSpacesAreDeleted() throws IOException { + final String code = "noSpaces, leadingSpaces,trailingSpaces , surroundingSpaces , ,,"; + try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { + assertThat(parser.nextToken(new Token()), matches(TOKEN, "noSpaces")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingSpaces")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingSpaces")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingSpaces")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); + assertThat(parser.nextToken(new Token()), matches(EOF, "")); + } + } + + @Test + public void testSurroundingTabsAreDeleted() throws IOException { + final String code = "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,,"; + try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { + assertThat(parser.nextToken(new Token()), matches(TOKEN, "noTabs")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingTab")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingTab")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingTabs")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); + assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); + assertThat(parser.nextToken(new Token()), matches(EOF, "")); + } + } + + @Test + public void testTab() throws Exception { + try (final Lexer lexer = createLexer("character" + TAB + "NotEscaped", formatWithEscaping)) { + assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "NotEscaped")); + } + } + + @Test + public void testTrimTrailingSpacesZeroLength() throws Exception { + final StringBuilder buffer = new StringBuilder(""); + final Lexer lexer = createLexer(buffer.toString(), CSVFormat.DEFAULT); + lexer.trimTrailingSpaces(buffer); + assertThat(lexer.nextToken(new Token()), matches(EOF, "")); + } } diff --git a/src/test/java/org/apache/commons/csv/PerformanceTest.java b/src/test/java/org/apache/commons/csv/PerformanceTest.java index 84ed0837..ea6e8fd1 100644 --- a/src/test/java/org/apache/commons/csv/PerformanceTest.java +++ b/src/test/java/org/apache/commons/csv/PerformanceTest.java @@ -41,6 +41,21 @@ import org.apache.commons.io.IOUtils; @SuppressWarnings("boxing") public class PerformanceTest { + @FunctionalInterface + private interface CSVParserFactory { + CSVParser createParser() throws IOException; + } + + // Container for basic statistics + private static class Stats { + final int count; + final int fields; + Stats(final int c, final int f) { + count = c; + fields = f; + } + } + private static final String[] PROPS = { "java.version", // Java Runtime Environment version "java.vendor", // Java Runtime Environment vendor @@ -58,17 +73,42 @@ public class PerformanceTest { "os.arch", // Operating system architecture "os.version", // Operating system version }; - private static int max = 11; // skip first test private static int num; // number of elapsed times recorded - private static final long[] ELAPSED_TIMES = new long[max]; + private static final long[] ELAPSED_TIMES = new long[max]; private static final CSVFormat format = CSVFormat.EXCEL; private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz"; + private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt"); + private static Reader createReader() throws IOException { + return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1); + } + + private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input) + throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception { + return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input); + } + + private static Constructor getLexerCtor(final String clazz) throws Exception { + @SuppressWarnings("unchecked") + final Class lexer = (Class) Class.forName("org.apache.commons.csv." + clazz); + return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class); + } + + private static Stats iterate(final Iterable it) { + int count = 0; + int fields = 0; + for (final CSVRecord record : it) { + count++; + fields += record.size(); + } + return new Stats(count, fields); + } + public static void main(final String [] args) throws Exception { if (BIG_FILE.exists()) { System.out.printf("Found test fixture %s: %,d bytes.%n", BIG_FILE, BIG_FILE.length()); @@ -128,26 +168,15 @@ public class PerformanceTest { } } - private static Reader createReader() throws IOException { - return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1); - } - - // Container for basic statistics - private static class Stats { - final int count; - final int fields; - Stats(final int c, final int f) { - count = c; - fields = f; + private static Stats readAll(final BufferedReader in, final boolean split) throws IOException { + int count = 0; + int fields = 0; + String record; + while ((record = in.readLine()) != null) { + count++; + fields += split ? record.split(",").length : 1; } - } - - // Display end stats; store elapsed for average - private static void show(final String msg, final Stats s, final long start) { - final long elapsed = System.currentTimeMillis() - start; - System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields); - ELAPSED_TIMES[num] = elapsed; - num++; + return new Stats(count, fields); } // calculate and show average @@ -162,106 +191,12 @@ public class PerformanceTest { num = 0; // ready for next set } - private static void testReadBigFile(final boolean split) throws Exception { - for (int i = 0; i < max; i++) { - final long startMillis; - final Stats stats; - try (final BufferedReader in = new BufferedReader(createReader())) { - startMillis = System.currentTimeMillis(); - stats = readAll(in, split); - } - show(split ? "file+split" : "file", stats, startMillis); - } - show(); - } - - private static Stats readAll(final BufferedReader in, final boolean split) throws IOException { - int count = 0; - int fields = 0; - String record; - while ((record = in.readLine()) != null) { - count++; - fields += split ? record.split(",").length : 1; - } - return new Stats(count, fields); - } - - private static void testExtendedBuffer(final boolean makeString) throws Exception { - for (int i = 0; i < max; i++) { - int fields = 0; - int lines = 0; - final long startMillis; - try (final ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) { - startMillis = System.currentTimeMillis(); - int read; - if (makeString) { - StringBuilder sb = new StringBuilder(); - while ((read = in.read()) != -1) { - sb.append((char) read); - if (read == ',') { // count delimiters - sb.toString(); - sb = new StringBuilder(); - fields++; - } else if (read == '\n') { - sb.toString(); - sb = new StringBuilder(); - lines++; - } - } - } else { - while ((read = in.read()) != -1) { - if (read == ',') { // count delimiters - fields++; - } else if (read == '\n') { - lines++; - } - } - } - fields += lines; // EOL is a delimiter too - } - show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis); - } - show(); - } - - private static void testParser(final String msg, final CSVParserFactory fac) throws Exception { - for (int i = 0; i < max; i++) { - final long startMillis; - final Stats stats; - try (final CSVParser parser = fac.createParser()) { - startMillis = System.currentTimeMillis(); - stats = iterate(parser); - } - show(msg, stats, startMillis); - } - show(); - } - - @FunctionalInterface - private interface CSVParserFactory { - CSVParser createParser() throws IOException; - } - - private static void testParseCommonsCSV() throws Exception { - testParser("CSV", () -> new CSVParser(createReader(), format)); - } - - private static void testParsePath() throws Exception { - testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format)); - } - - private static void testParsePathDoubleBuffering() throws Exception { - testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format)); - } - - private static void testParseURL() throws Exception { - testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format)); - } - - private static Constructor getLexerCtor(final String clazz) throws Exception { - @SuppressWarnings("unchecked") - final Class lexer = (Class) Class.forName("org.apache.commons.csv." + clazz); - return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class); + // Display end stats; store elapsed for average + private static void show(final String msg, final Stats s, final long start) { + final long elapsed = System.currentTimeMillis() - start; + System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields); + ELAPSED_TIMES[num] = elapsed; + num++; } private static void testCSVLexer(final boolean newToken, final String test) throws Exception { @@ -312,19 +247,84 @@ public class PerformanceTest { show(); } - private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input) - throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception { - return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input); + private static void testExtendedBuffer(final boolean makeString) throws Exception { + for (int i = 0; i < max; i++) { + int fields = 0; + int lines = 0; + final long startMillis; + try (final ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) { + startMillis = System.currentTimeMillis(); + int read; + if (makeString) { + StringBuilder sb = new StringBuilder(); + while ((read = in.read()) != -1) { + sb.append((char) read); + if (read == ',') { // count delimiters + sb.toString(); + sb = new StringBuilder(); + fields++; + } else if (read == '\n') { + sb.toString(); + sb = new StringBuilder(); + lines++; + } + } + } else { + while ((read = in.read()) != -1) { + if (read == ',') { // count delimiters + fields++; + } else if (read == '\n') { + lines++; + } + } + } + fields += lines; // EOL is a delimiter too + } + show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis); + } + show(); } - private static Stats iterate(final Iterable it) { - int count = 0; - int fields = 0; - for (final CSVRecord record : it) { - count++; - fields += record.size(); + private static void testParseCommonsCSV() throws Exception { + testParser("CSV", () -> new CSVParser(createReader(), format)); + } + + private static void testParsePath() throws Exception { + testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format)); + } + + private static void testParsePathDoubleBuffering() throws Exception { + testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format)); + } + + private static void testParser(final String msg, final CSVParserFactory fac) throws Exception { + for (int i = 0; i < max; i++) { + final long startMillis; + final Stats stats; + try (final CSVParser parser = fac.createParser()) { + startMillis = System.currentTimeMillis(); + stats = iterate(parser); + } + show(msg, stats, startMillis); } - return new Stats(count, fields); + show(); + } + + private static void testParseURL() throws Exception { + testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format)); + } + + private static void testReadBigFile(final boolean split) throws Exception { + for (int i = 0; i < max; i++) { + final long startMillis; + final Stats stats; + try (final BufferedReader in = new BufferedReader(createReader())) { + startMillis = System.currentTimeMillis(); + stats = readAll(in, split); + } + show(split ? "file+split" : "file", stats, startMillis); + } + show(); } } \ No newline at end of file diff --git a/src/test/java/org/apache/commons/csv/TokenMatchers.java b/src/test/java/org/apache/commons/csv/TokenMatchers.java index cfe522e8..c081e7ee 100644 --- a/src/test/java/org/apache/commons/csv/TokenMatchers.java +++ b/src/test/java/org/apache/commons/csv/TokenMatchers.java @@ -27,25 +27,6 @@ import org.hamcrest.TypeSafeDiagnosingMatcher; */ final class TokenMatchers { - public static Matcher hasType(final Token.Type expectedType) { - return new TypeSafeDiagnosingMatcher() { - - @Override - public void describeTo(final Description description) { - description.appendText("token has type "); - description.appendValue(expectedType); - } - - @Override - protected boolean matchesSafely(final Token item, - final Description mismatchDescription) { - mismatchDescription.appendText("token type is "); - mismatchDescription.appendValue(item.type); - return item.type == expectedType; - } - }; - } - public static Matcher hasContent(final String expectedContent) { return new TypeSafeDiagnosingMatcher() { @@ -65,6 +46,25 @@ final class TokenMatchers { }; } + public static Matcher hasType(final Token.Type expectedType) { + return new TypeSafeDiagnosingMatcher() { + + @Override + public void describeTo(final Description description) { + description.appendText("token has type "); + description.appendValue(expectedType); + } + + @Override + protected boolean matchesSafely(final Token item, + final Description mismatchDescription) { + mismatchDescription.appendText("token type is "); + mismatchDescription.appendValue(item.type); + return item.type == expectedType; + } + }; + } + public static Matcher isReady() { return new TypeSafeDiagnosingMatcher() { diff --git a/src/test/java/org/apache/commons/csv/TokenMatchersTest.java b/src/test/java/org/apache/commons/csv/TokenMatchersTest.java index e8a115dc..47c213d7 100644 --- a/src/test/java/org/apache/commons/csv/TokenMatchersTest.java +++ b/src/test/java/org/apache/commons/csv/TokenMatchersTest.java @@ -39,6 +39,12 @@ public class TokenMatchersTest { token.content.append("content"); } + @Test + public void testHasContent() { + assertFalse(hasContent("This is not the token's content").matches(token)); + assertTrue(hasContent("content").matches(token)); + } + @Test public void testHasType() { assertFalse(hasType(Token.Type.COMMENT).matches(token)); @@ -47,12 +53,6 @@ public class TokenMatchersTest { assertTrue(hasType(Token.Type.TOKEN).matches(token)); } - @Test - public void testHasContent() { - assertFalse(hasContent("This is not the token's content").matches(token)); - assertTrue(hasContent("content").matches(token)); - } - @Test public void testIsReady() { assertTrue(isReady().matches(token)); diff --git a/src/test/java/org/apache/commons/csv/Utils.java b/src/test/java/org/apache/commons/csv/Utils.java index 289083e7..fcdbc3d1 100644 --- a/src/test/java/org/apache/commons/csv/Utils.java +++ b/src/test/java/org/apache/commons/csv/Utils.java @@ -28,9 +28,6 @@ import java.util.List; */ final class Utils { - private Utils() { - } - /** * Checks if the 2d array has the same contents as the list of records. * @@ -45,4 +42,7 @@ final class Utils { assertArrayEquals(expected[i], actual.get(i).values(), message + " (entry " + i + ")"); } } + + private Utils() { + } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java index e8da0643..fca6bec2 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv148Test.java @@ -24,6 +24,23 @@ import org.junit.jupiter.api.Test; public class JiraCsv148Test { + @Test + public void testWithIgnoreSurroundingSpacesEmpty() { + // @formatter:off + final CSVFormat format = CSVFormat.DEFAULT.builder() + .setQuoteMode(QuoteMode.ALL) + .setIgnoreSurroundingSpaces(true) + .build(); + // @formatter:on + assertEquals( + "\"\",\" \",\" Single space on the left\",\"Single space on the right \"," + + "\" Single spaces on both sides \",\" Multiple spaces on the left\"," + + "\"Multiple spaces on the right \",\" Multiple spaces on both sides \"", + format.format("", " ", " Single space on the left", "Single space on the right ", + " Single spaces on both sides ", " Multiple spaces on the left", "Multiple spaces on the right ", + " Multiple spaces on both sides ")); + } + /** * The difference between withTrim()and withIgnoreSurroundingSpace(): difference: withTrim() can remove the leading * and trailing spaces and newlines in quotation marks, while withIgnoreSurroundingSpace() cannot The same point: @@ -45,21 +62,4 @@ public class JiraCsv148Test { " Single spaces on both sides ", " Multiple spaces on the left", "Multiple spaces on the right ", " Multiple spaces on both sides ")); } - - @Test - public void testWithIgnoreSurroundingSpacesEmpty() { - // @formatter:off - final CSVFormat format = CSVFormat.DEFAULT.builder() - .setQuoteMode(QuoteMode.ALL) - .setIgnoreSurroundingSpaces(true) - .build(); - // @formatter:on - assertEquals( - "\"\",\" \",\" Single space on the left\",\"Single space on the right \"," - + "\" Single spaces on both sides \",\" Multiple spaces on the left\"," - + "\"Multiple spaces on the right \",\" Multiple spaces on both sides \"", - format.format("", " ", " Single space on the left", "Single space on the right ", - " Single spaces on both sides ", " Multiple spaces on the left", "Multiple spaces on the right ", - " Multiple spaces on both sides ")); - } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java index 7f608327..c7effb6e 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java @@ -31,6 +31,11 @@ import org.junit.jupiter.api.Test; public class JiraCsv167Test { + private Reader getTestReader() { + return new InputStreamReader( + ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/csv-167/sample1.csv")); + } + @Test public void parse() throws IOException { int totcomment = 0; @@ -81,9 +86,4 @@ public class JiraCsv167Test { assertEquals(totcomment, comments); assertEquals(totrecs, records); // records includes the header } - - private Reader getTestReader() { - return new InputStreamReader( - ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/csv-167/sample1.csv")); - } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv203Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv203Test.java index ceb41328..17c62351 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv203Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv203Test.java @@ -61,21 +61,6 @@ public class JiraCsv203Test { assertEquals("N/A,\"Hello\",N/A,\"World\"\r\n", buffer.toString()); } - @Test - public void testWithoutQuoteMode() throws Exception { - // @formatter:off - final CSVFormat format = CSVFormat.EXCEL.builder() - .setNullString("N/A") - .setIgnoreSurroundingSpaces(true) - .build(); - // @formatter:on - final StringBuilder buffer = new StringBuilder(); - try (final CSVPrinter printer = new CSVPrinter(buffer, format)) { - printer.printRecord(null, "Hello", null, "World"); - } - assertEquals("N/A,Hello,N/A,World\r\n", buffer.toString()); - } - @Test public void testQuoteModeMinimal() throws Exception { // @formatter:off @@ -108,6 +93,23 @@ public class JiraCsv203Test { assertEquals("N/A,\"Hello\",N/A,\"World\"\r\n", buffer.toString()); } + @Test + public void testWithEmptyValues() throws Exception { + // @formatter:off + final CSVFormat format = CSVFormat.EXCEL.builder() + .setNullString("N/A") + .setIgnoreSurroundingSpaces(true) + .setQuoteMode(QuoteMode.ALL) + .build(); + // @formatter:on + final StringBuilder buffer = new StringBuilder(); + try (final CSVPrinter printer = new CSVPrinter(buffer, format)) { + printer.printRecord("", "Hello", "", "World"); + // printer.printRecord(new Object[] { null, "Hello", null, "World" }); + } + assertEquals("\"\",\"Hello\",\"\",\"World\"\r\n", buffer.toString()); + } + @Test public void testWithoutNullString() throws Exception { // @formatter:off @@ -125,19 +127,17 @@ public class JiraCsv203Test { } @Test - public void testWithEmptyValues() throws Exception { + public void testWithoutQuoteMode() throws Exception { // @formatter:off final CSVFormat format = CSVFormat.EXCEL.builder() .setNullString("N/A") .setIgnoreSurroundingSpaces(true) - .setQuoteMode(QuoteMode.ALL) .build(); // @formatter:on final StringBuilder buffer = new StringBuilder(); try (final CSVPrinter printer = new CSVPrinter(buffer, format)) { - printer.printRecord("", "Hello", "", "World"); - // printer.printRecord(new Object[] { null, "Hello", null, "World" }); + printer.printRecord(null, "Hello", null, "World"); } - assertEquals("\"\",\"Hello\",\"\",\"World\"\r\n", buffer.toString()); + assertEquals("N/A,Hello,N/A,World\r\n", buffer.toString()); } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java index 13d72390..84d274ac 100644 --- a/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java @@ -30,6 +30,10 @@ import org.apache.commons.csv.CSVRecord; import org.junit.jupiter.api.Test; public class JiraCsv248Test { + private static InputStream getTestInput() { + return ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/CSV-248/csvRecord.bin"); + } + /** * Test deserialisation of a CSVRecord created using version 1.6. * @@ -74,8 +78,4 @@ public class JiraCsv248Test { } } } - - private static InputStream getTestInput() { - return ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/CSV-248/csvRecord.bin"); - } } diff --git a/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java b/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java index 0579dd12..6a049881 100644 --- a/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java +++ b/src/test/java/org/apache/commons/csv/perf/PerformanceTest.java @@ -43,11 +43,9 @@ import org.junit.jupiter.api.Test; @SuppressWarnings("boxing") // test code public class PerformanceTest { - private final int max = 10; - private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz"; - private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt"); + private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt"); @BeforeAll public static void setUpClass() throws FileNotFoundException, IOException { if (BIG_FILE.exists()) { @@ -64,6 +62,8 @@ public class PerformanceTest { } } + private final int max = 10; + private BufferedReader createBufferedReader() throws IOException { return new BufferedReader(new FileReader(BIG_FILE)); }