From 8f98cf634effa7a1b7600d45c58303a77610521f Mon Sep 17 00:00:00 2001 From: Igor Kamyshnikov Date: Fri, 5 Apr 2024 17:59:01 +0100 Subject: [PATCH 1/2] MongoDBCsv empty first column parsing fix --- .../java/org/apache/commons/csv/Lexer.java | 24 +++++++-------- .../org/apache/commons/csv/CSVParserTest.java | 29 +++++++++++++++++++ 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java index 17113227..80bde6c7 100644 --- a/src/main/java/org/apache/commons/csv/Lexer.java +++ b/src/main/java/org/apache/commons/csv/Lexer.java @@ -334,18 +334,7 @@ final class Lexer implements Closeable { while (true) { c = reader.read(); - if (isEscape(c)) { - if (isEscapeDelimiter()) { - token.content.append(delimiter); - } else { - final int unescaped = readEscape(); - if (unescaped == EOF) { // unexpected char after escape - token.content.append((char) c).append((char) reader.getLastChar()); - } else { - token.content.append((char) unescaped); - } - } - } else if (isQuoteChar(c)) { + if (isQuoteChar(c)) { if (isQuoteChar(reader.lookAhead())) { // double or escaped encapsulator -> add single encapsulator to token c = reader.read(); @@ -376,6 +365,17 @@ final class Lexer implements Closeable { } } } + } else if (isEscape(c)) { + if (isEscapeDelimiter()) { + token.content.append(delimiter); + } else { + final int unescaped = readEscape(); + if (unescaped == EOF) { // unexpected char after escape + token.content.append((char) c).append((char) reader.getLastChar()); + } else { + token.content.append((char) unescaped); + } + } } else if (isEndOfFile(c)) { if (lenientEof) { token.type = Token.Type.EOF; diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index b3a51378..c0f497f7 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -1562,6 +1562,35 @@ public class CSVParserTest { assertEquals(3, record.size()); }} + @Test + public void testParsingPrintedEmptyFirstColumn() throws Exception { + Exception firstException = null; + for (CSVFormat.Predefined format : CSVFormat.Predefined.values()) { + try { + StringWriter buf = new StringWriter(); + try (CSVPrinter printer = new CSVPrinter(buf, format.getFormat())) { + printer.printRecord("a", "b"); // header + printer.printRecord("", "x"); // empty first column + } + try (CSVParser csvRecords = new CSVParser(new StringReader(buf.toString()), format.getFormat().builder().setHeader().build())) { + for (CSVRecord csvRecord : csvRecords) { + assertNotNull(csvRecord); + } + } + } catch (Exception | Error e) { + Exception detailedException = new RuntimeException("format: " + format, e); + if (firstException == null) { + firstException = detailedException; + } else { + firstException.addSuppressed(detailedException); + } + } + } + + if (firstException != null) + throw firstException; + } + private void validateLineNumbers(final String lineSeparator) throws IOException { try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) { assertEquals(0, parser.getCurrentLineNumber()); From df7c340e7be5f662e641e911f1d5c76685abeaba Mon Sep 17 00:00:00 2001 From: Igor Kamyshnikov Date: Fri, 5 Apr 2024 20:27:43 +0100 Subject: [PATCH 2/2] better test - comparing printer input and parser output --- .../org/apache/commons/csv/CSVParserTest.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index c0f497f7..fd9de361 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -1564,19 +1564,25 @@ public class CSVParserTest { @Test public void testParsingPrintedEmptyFirstColumn() throws Exception { + String[][] lines = new String[][] { + {"a", "b"}, + {"", "x"} + }; Exception firstException = null; for (CSVFormat.Predefined format : CSVFormat.Predefined.values()) { try { StringWriter buf = new StringWriter(); try (CSVPrinter printer = new CSVPrinter(buf, format.getFormat())) { - printer.printRecord("a", "b"); // header - printer.printRecord("", "x"); // empty first column - } - try (CSVParser csvRecords = new CSVParser(new StringReader(buf.toString()), format.getFormat().builder().setHeader().build())) { - for (CSVRecord csvRecord : csvRecords) { - assertNotNull(csvRecord); + for (String[] line : lines) { + printer.printRecord((Object[]) line); } } + try (CSVParser csvRecords = new CSVParser(new StringReader(buf.toString()), format.getFormat())) { + for (String[] line : lines) { + assertArrayEquals(line, csvRecords.nextRecord().values()); + } + assertNull(csvRecords.nextRecord()); + } } catch (Exception | Error e) { Exception detailedException = new RuntimeException("format: " + format, e); if (firstException == null) {