Merge pull request #412 from ikfid/mongo_db_csv_empty_first_column

MongoDB CSV empty first column parsing fix
This commit is contained in:
Gary Gregory 2024-04-06 08:14:49 -04:00 committed by GitHub
commit 5770cc07e3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 47 additions and 12 deletions

View File

@ -334,18 +334,7 @@ final class Lexer implements Closeable {
while (true) { while (true) {
c = reader.read(); c = reader.read();
if (isEscape(c)) { if (isQuoteChar(c)) {
if (isEscapeDelimiter()) {
token.content.append(delimiter);
} else {
final int unescaped = readEscape();
if (unescaped == EOF) { // unexpected char after escape
token.content.append((char) c).append((char) reader.getLastChar());
} else {
token.content.append((char) unescaped);
}
}
} else if (isQuoteChar(c)) {
if (isQuoteChar(reader.lookAhead())) { if (isQuoteChar(reader.lookAhead())) {
// double or escaped encapsulator -> add single encapsulator to token // double or escaped encapsulator -> add single encapsulator to token
c = reader.read(); c = reader.read();
@ -376,6 +365,17 @@ final class Lexer implements Closeable {
} }
} }
} }
} else if (isEscape(c)) {
if (isEscapeDelimiter()) {
token.content.append(delimiter);
} else {
final int unescaped = readEscape();
if (unescaped == EOF) { // unexpected char after escape
token.content.append((char) c).append((char) reader.getLastChar());
} else {
token.content.append((char) unescaped);
}
}
} else if (isEndOfFile(c)) { } else if (isEndOfFile(c)) {
if (lenientEof) { if (lenientEof) {
token.type = Token.Type.EOF; token.type = Token.Type.EOF;

View File

@ -1562,6 +1562,41 @@ public class CSVParserTest {
assertEquals(3, record.size()); assertEquals(3, record.size());
}} }}
@Test
public void testParsingPrintedEmptyFirstColumn() throws Exception {
String[][] lines = new String[][] {
{"a", "b"},
{"", "x"}
};
Exception firstException = null;
for (CSVFormat.Predefined format : CSVFormat.Predefined.values()) {
try {
StringWriter buf = new StringWriter();
try (CSVPrinter printer = new CSVPrinter(buf, format.getFormat())) {
for (String[] line : lines) {
printer.printRecord((Object[]) line);
}
}
try (CSVParser csvRecords = new CSVParser(new StringReader(buf.toString()), format.getFormat())) {
for (String[] line : lines) {
assertArrayEquals(line, csvRecords.nextRecord().values());
}
assertNull(csvRecords.nextRecord());
}
} catch (Exception | Error e) {
Exception detailedException = new RuntimeException("format: " + format, e);
if (firstException == null) {
firstException = detailedException;
} else {
firstException.addSuppressed(detailedException);
}
}
}
if (firstException != null)
throw firstException;
}
private void validateLineNumbers(final String lineSeparator) throws IOException { private void validateLineNumbers(final String lineSeparator) throws IOException {
try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) { try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) {
assertEquals(0, parser.getCurrentLineNumber()); assertEquals(0, parser.getCurrentLineNumber());