Merge pull request #265 from angusdev/CSV-290

CSV-290 - Fix the wrong assumptions in PostgreSQL formats
This commit is contained in:
Gary Gregory 2022-10-15 09:34:12 -04:00 committed by GitHub
commit 41a063d2cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 139 additions and 11 deletions

View File

@ -260,6 +260,8 @@
<exclude>src/test/resources/org/apache/commons/csv/CSV-259/sample.txt</exclude>
<exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246.csv</exclude>
<exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246_checkWithNoComment.txt</exclude>
<exclude>src/test/resources/org/apache/commons/csv/CSV-290/psql.csv</exclude>
<exclude>src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv</exclude>
</excludes>
</configuration>
</plugin>

View File

@ -1045,7 +1045,7 @@ public final class CSVFormat implements Serializable {
*
* <p>
* This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special
* characters are escaped with {@code '"'}. The default NULL string is {@code ""}.
* characters are not escaped. The default NULL string is {@code ""}.
* </p>
*
* <p>
@ -1053,7 +1053,7 @@ public final class CSVFormat implements Serializable {
* </p>
* <ul>
* <li>{@code setDelimiter(',')}</li>
* <li>{@code setEscape('"')}</li>
* <li>{@code setEscape(null)}</li>
* <li>{@code setIgnoreEmptyLines(false)}</li>
* <li>{@code setQuote('"')}</li>
* <li>{@code setRecordSeparator('\n')}</li>
@ -1069,7 +1069,7 @@ public final class CSVFormat implements Serializable {
// @formatter:off
public static final CSVFormat POSTGRESQL_CSV = DEFAULT.builder()
.setDelimiter(COMMA)
.setEscape(DOUBLE_QUOTE_CHAR)
.setEscape(null)
.setIgnoreEmptyLines(false)
.setQuote(DOUBLE_QUOTE_CHAR)
.setRecordSeparator(LF)
@ -1082,8 +1082,8 @@ public final class CSVFormat implements Serializable {
* Default PostgreSQL text format used by the {@code COPY} operation.
*
* <p>
* This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special
* characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}.
* This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
* characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}.
* </p>
*
* <p>
@ -1093,7 +1093,7 @@ public final class CSVFormat implements Serializable {
* <li>{@code setDelimiter('\t')}</li>
* <li>{@code setEscape('\\')}</li>
* <li>{@code setIgnoreEmptyLines(false)}</li>
* <li>{@code setQuote('"')}</li>
* <li>{@code setQuote(null)}</li>
* <li>{@code setRecordSeparator('\n')}</li>
* <li>{@code setNullString("\\N")}</li>
* <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li>
@ -1109,7 +1109,7 @@ public final class CSVFormat implements Serializable {
.setDelimiter(TAB)
.setEscape(BACKSLASH)
.setIgnoreEmptyLines(false)
.setQuote(DOUBLE_QUOTE_CHAR)
.setQuote(null)
.setRecordSeparator(LF)
.setNullString("\\N")
.setQuoteMode(QuoteMode.ALL_NON_NULL)

View File

@ -706,7 +706,7 @@ public class CSVFormatTest {
assertFalse(csvFormat.getTrailingDelimiter());
assertFalse(csvFormat.getTrim());
assertTrue(csvFormat.isQuoteCharacterSet());
assertFalse(csvFormat.isQuoteCharacterSet());
assertEquals("\\N", csvFormat.getNullString());
assertFalse(csvFormat.getIgnoreHeaderCase());
@ -724,7 +724,7 @@ public class CSVFormatTest {
assertEquals("\n", csvFormat.getRecordSeparator());
assertFalse(csvFormat.getIgnoreEmptyLines());
assertEquals('\"', (char)csvFormat.getQuoteCharacter());
assertNull(csvFormat.getQuoteCharacter());
assertTrue(csvFormat.isNullStringSet());
assertEquals('\\', (char)csvFormat.getEscapeCharacter());
@ -733,7 +733,7 @@ public class CSVFormatTest {
assertFalse(csvFormat.getTrailingDelimiter());
assertFalse(csvFormat.getTrim());
assertTrue(csvFormat.isQuoteCharacterSet());
assertFalse(csvFormat.isQuoteCharacterSet());
assertEquals("\\N", csvFormat.getNullString());
assertFalse(csvFormat.getIgnoreHeaderCase());
@ -751,7 +751,7 @@ public class CSVFormatTest {
assertEquals("\n", csvFormat.getRecordSeparator());
assertFalse(csvFormat.getIgnoreEmptyLines());
assertEquals('\"', (char)csvFormat.getQuoteCharacter());
assertNull(csvFormat.getQuoteCharacter());
assertTrue(csvFormat.isNullStringSet());
Assertions.assertNotEquals(null, csvFormat);

View File

@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv.issues;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.junit.jupiter.api.Test;
// psql (14.5 (Homebrew))
//
// create table COMMONS_CSV_PSQL_TEST (ID INTEGER, COL1 VARCHAR, COL2 VARCHAR, COL3 VARCHAR, COL4 VARCHAR);
// insert into COMMONS_CSV_PSQL_TEST select 1, 'abc', 'test line 1' || chr(10) || 'test line 2', null, '';
// insert into COMMONS_CSV_PSQL_TEST select 2, 'xyz', '\b:' || chr(8) || ' \t:' || chr(9) || ' \n:' || chr(10) || ' \r:' || chr(13), 'a', 'b';
// insert into COMMONS_CSV_PSQL_TEST values (3, 'a', 'b,c,d', '"quoted"', 'e');
// copy COMMONS_CSV_PSQL_TEST TO '/tmp/psql.csv' WITH (FORMAT CSV);
// copy COMMONS_CSV_PSQL_TEST TO '/tmp/psql.tsv';
//
// cat /tmp/psql.csv
// 1,abc,"test line 1
// test line 2",,""
// 2,xyz,"\b:^H \t: \n:
// \r:^M",a,b
// 3,a,"b,c,d","""quoted""",e
//
// cat /tmp/psql.tsv
// 1 abc test line 1\ntest line 2 \N
// 2 xyz \\b:\b \\t:\t \\n:\n \\r:\r a b
// 3 a b,c,d "quoted" e
//
public class JiraCsv290Test {
private void testHelper(String filename, CSVFormat format) throws Exception {
List<List<String>> content = new ArrayList<>();
try (CSVParser csvParser = CSVParser.parse(new InputStreamReader(
this.getClass().getResourceAsStream("/org/apache/commons/csv/CSV-290/" + filename)), format)) {
for (CSVRecord csvRecord : csvParser) {
List<String> row = new ArrayList<>();
content.add(row);
for (int i = 0; i < csvRecord.size(); i++) {
row.add(csvRecord.get(i));
}
}
}
assertEquals(3, content.size());
assertEquals("1", content.get(0).get(0));
assertEquals("abc", content.get(0).get(1));
assertEquals("test line 1\ntest line 2", content.get(0).get(2)); // new line
assertEquals(null, content.get(0).get(3)); // null
assertEquals("", content.get(0).get(4));
assertEquals("2", content.get(1).get(0));
assertEquals("\\b:\b \\t:\t \\n:\n \\r:\r", content.get(1).get(2)); // \b, \t, \n, \r
assertEquals("3", content.get(2).get(0));
assertEquals("b,c,d", content.get(2).get(2)); // value has comma
assertEquals("\"quoted\"", content.get(2).get(3)); // quoted
}
@Test
public void testPostgresqlCsv() throws Exception {
testHelper("psql.csv", CSVFormat.POSTGRESQL_CSV);
}
@Test
public void testPostgresqlText() throws Exception {
testHelper("psql.tsv", CSVFormat.POSTGRESQL_TEXT);
}
@Test
public void testWriteThenRead() throws Exception {
StringWriter sw = new StringWriter();
CSVPrinter printer = new CSVPrinter(sw,
CSVFormat.POSTGRESQL_CSV.builder().setHeader().setSkipHeaderRecord(true).build());
printer.printRecord("column1", "column2");
printer.printRecord("v11", "v12");
printer.printRecord("v21", "v22");
printer.close();
CSVParser parser = new CSVParser(new StringReader(sw.toString()),
CSVFormat.POSTGRESQL_CSV.builder().setHeader().setSkipHeaderRecord(true).build());
assertArrayEquals(new Object[] { "column1", "column2" }, parser.getHeaderNames().toArray());
Iterator<CSVRecord> i = parser.iterator();
assertArrayEquals(new String[] { "v11", "v12" }, i.next().toList().toArray());
assertArrayEquals(new String[] { "v21", "v22" }, i.next().toList().toArray());
}
}

View File

@ -0,0 +1,5 @@
1,abc,"test line 1
test line 2",,""
2,xyz,"\b: \t: \n:
\r: ",a,b
3,a,"b,c,d","""quoted""",e
1 1 abc test line 1 test line 2
2 2 xyz \b: \t: \n: \r: a b
3 3 a b,c,d "quoted" e

View File

@ -0,0 +1,3 @@
1 abc test line 1\ntest line 2 \N
2 xyz \\b:\b \\t:\t \\n:\n \\r:\r a b
3 a b,c,d "quoted" e
1 1 abc test line 1\ntest line 2 \N
2 2 xyz \\b:\b \\t:\t \\n:\n \\r:\r a b
3 3 a b,c,d quoted e