Merge pull request #265 from angusdev/CSV-290
CSV-290 - Fix the wrong assumptions in PostgreSQL formats
This commit is contained in:
commit
41a063d2cf
2
pom.xml
2
pom.xml
|
@ -260,6 +260,8 @@
|
|||
<exclude>src/test/resources/org/apache/commons/csv/CSV-259/sample.txt</exclude>
|
||||
<exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246.csv</exclude>
|
||||
<exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246_checkWithNoComment.txt</exclude>
|
||||
<exclude>src/test/resources/org/apache/commons/csv/CSV-290/psql.csv</exclude>
|
||||
<exclude>src/test/resources/org/apache/commons/csv/CSV-290/psql.tsv</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
|
|
@ -1045,7 +1045,7 @@ public final class CSVFormat implements Serializable {
|
|||
*
|
||||
* <p>
|
||||
* This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special
|
||||
* characters are escaped with {@code '"'}. The default NULL string is {@code ""}.
|
||||
* characters are not escaped. The default NULL string is {@code ""}.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
|
@ -1053,7 +1053,7 @@ public final class CSVFormat implements Serializable {
|
|||
* </p>
|
||||
* <ul>
|
||||
* <li>{@code setDelimiter(',')}</li>
|
||||
* <li>{@code setEscape('"')}</li>
|
||||
* <li>{@code setEscape(null)}</li>
|
||||
* <li>{@code setIgnoreEmptyLines(false)}</li>
|
||||
* <li>{@code setQuote('"')}</li>
|
||||
* <li>{@code setRecordSeparator('\n')}</li>
|
||||
|
@ -1069,7 +1069,7 @@ public final class CSVFormat implements Serializable {
|
|||
// @formatter:off
|
||||
public static final CSVFormat POSTGRESQL_CSV = DEFAULT.builder()
|
||||
.setDelimiter(COMMA)
|
||||
.setEscape(DOUBLE_QUOTE_CHAR)
|
||||
.setEscape(null)
|
||||
.setIgnoreEmptyLines(false)
|
||||
.setQuote(DOUBLE_QUOTE_CHAR)
|
||||
.setRecordSeparator(LF)
|
||||
|
@ -1082,8 +1082,8 @@ public final class CSVFormat implements Serializable {
|
|||
* Default PostgreSQL text format used by the {@code COPY} operation.
|
||||
*
|
||||
* <p>
|
||||
* This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special
|
||||
* characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}.
|
||||
* This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
|
||||
* characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
|
@ -1093,7 +1093,7 @@ public final class CSVFormat implements Serializable {
|
|||
* <li>{@code setDelimiter('\t')}</li>
|
||||
* <li>{@code setEscape('\\')}</li>
|
||||
* <li>{@code setIgnoreEmptyLines(false)}</li>
|
||||
* <li>{@code setQuote('"')}</li>
|
||||
* <li>{@code setQuote(null)}</li>
|
||||
* <li>{@code setRecordSeparator('\n')}</li>
|
||||
* <li>{@code setNullString("\\N")}</li>
|
||||
* <li>{@code setQuoteMode(QuoteMode.ALL_NON_NULL)}</li>
|
||||
|
@ -1109,7 +1109,7 @@ public final class CSVFormat implements Serializable {
|
|||
.setDelimiter(TAB)
|
||||
.setEscape(BACKSLASH)
|
||||
.setIgnoreEmptyLines(false)
|
||||
.setQuote(DOUBLE_QUOTE_CHAR)
|
||||
.setQuote(null)
|
||||
.setRecordSeparator(LF)
|
||||
.setNullString("\\N")
|
||||
.setQuoteMode(QuoteMode.ALL_NON_NULL)
|
||||
|
|
|
@ -706,7 +706,7 @@ public class CSVFormatTest {
|
|||
assertFalse(csvFormat.getTrailingDelimiter());
|
||||
assertFalse(csvFormat.getTrim());
|
||||
|
||||
assertTrue(csvFormat.isQuoteCharacterSet());
|
||||
assertFalse(csvFormat.isQuoteCharacterSet());
|
||||
assertEquals("\\N", csvFormat.getNullString());
|
||||
|
||||
assertFalse(csvFormat.getIgnoreHeaderCase());
|
||||
|
@ -724,7 +724,7 @@ public class CSVFormatTest {
|
|||
assertEquals("\n", csvFormat.getRecordSeparator());
|
||||
assertFalse(csvFormat.getIgnoreEmptyLines());
|
||||
|
||||
assertEquals('\"', (char)csvFormat.getQuoteCharacter());
|
||||
assertNull(csvFormat.getQuoteCharacter());
|
||||
assertTrue(csvFormat.isNullStringSet());
|
||||
|
||||
assertEquals('\\', (char)csvFormat.getEscapeCharacter());
|
||||
|
@ -733,7 +733,7 @@ public class CSVFormatTest {
|
|||
assertFalse(csvFormat.getTrailingDelimiter());
|
||||
assertFalse(csvFormat.getTrim());
|
||||
|
||||
assertTrue(csvFormat.isQuoteCharacterSet());
|
||||
assertFalse(csvFormat.isQuoteCharacterSet());
|
||||
assertEquals("\\N", csvFormat.getNullString());
|
||||
|
||||
assertFalse(csvFormat.getIgnoreHeaderCase());
|
||||
|
@ -751,7 +751,7 @@ public class CSVFormatTest {
|
|||
assertEquals("\n", csvFormat.getRecordSeparator());
|
||||
assertFalse(csvFormat.getIgnoreEmptyLines());
|
||||
|
||||
assertEquals('\"', (char)csvFormat.getQuoteCharacter());
|
||||
assertNull(csvFormat.getQuoteCharacter());
|
||||
assertTrue(csvFormat.isNullStringSet());
|
||||
|
||||
Assertions.assertNotEquals(null, csvFormat);
|
||||
|
|
|
@ -0,0 +1,118 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv.issues;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.StringReader;
|
||||
import java.io.StringWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.CSVParser;
|
||||
import org.apache.commons.csv.CSVPrinter;
|
||||
import org.apache.commons.csv.CSVRecord;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
// psql (14.5 (Homebrew))
|
||||
//
|
||||
// create table COMMONS_CSV_PSQL_TEST (ID INTEGER, COL1 VARCHAR, COL2 VARCHAR, COL3 VARCHAR, COL4 VARCHAR);
|
||||
// insert into COMMONS_CSV_PSQL_TEST select 1, 'abc', 'test line 1' || chr(10) || 'test line 2', null, '';
|
||||
// insert into COMMONS_CSV_PSQL_TEST select 2, 'xyz', '\b:' || chr(8) || ' \t:' || chr(9) || ' \n:' || chr(10) || ' \r:' || chr(13), 'a', 'b';
|
||||
// insert into COMMONS_CSV_PSQL_TEST values (3, 'a', 'b,c,d', '"quoted"', 'e');
|
||||
// copy COMMONS_CSV_PSQL_TEST TO '/tmp/psql.csv' WITH (FORMAT CSV);
|
||||
// copy COMMONS_CSV_PSQL_TEST TO '/tmp/psql.tsv';
|
||||
//
|
||||
// cat /tmp/psql.csv
|
||||
// 1,abc,"test line 1
|
||||
// test line 2",,""
|
||||
// 2,xyz,"\b:^H \t: \n:
|
||||
// \r:^M",a,b
|
||||
// 3,a,"b,c,d","""quoted""",e
|
||||
//
|
||||
// cat /tmp/psql.tsv
|
||||
// 1 abc test line 1\ntest line 2 \N
|
||||
// 2 xyz \\b:\b \\t:\t \\n:\n \\r:\r a b
|
||||
// 3 a b,c,d "quoted" e
|
||||
//
|
||||
public class JiraCsv290Test {
|
||||
private void testHelper(String filename, CSVFormat format) throws Exception {
|
||||
List<List<String>> content = new ArrayList<>();
|
||||
try (CSVParser csvParser = CSVParser.parse(new InputStreamReader(
|
||||
this.getClass().getResourceAsStream("/org/apache/commons/csv/CSV-290/" + filename)), format)) {
|
||||
for (CSVRecord csvRecord : csvParser) {
|
||||
List<String> row = new ArrayList<>();
|
||||
content.add(row);
|
||||
for (int i = 0; i < csvRecord.size(); i++) {
|
||||
row.add(csvRecord.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assertEquals(3, content.size());
|
||||
|
||||
assertEquals("1", content.get(0).get(0));
|
||||
assertEquals("abc", content.get(0).get(1));
|
||||
assertEquals("test line 1\ntest line 2", content.get(0).get(2)); // new line
|
||||
assertEquals(null, content.get(0).get(3)); // null
|
||||
assertEquals("", content.get(0).get(4));
|
||||
|
||||
assertEquals("2", content.get(1).get(0));
|
||||
assertEquals("\\b:\b \\t:\t \\n:\n \\r:\r", content.get(1).get(2)); // \b, \t, \n, \r
|
||||
|
||||
assertEquals("3", content.get(2).get(0));
|
||||
assertEquals("b,c,d", content.get(2).get(2)); // value has comma
|
||||
assertEquals("\"quoted\"", content.get(2).get(3)); // quoted
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostgresqlCsv() throws Exception {
|
||||
testHelper("psql.csv", CSVFormat.POSTGRESQL_CSV);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostgresqlText() throws Exception {
|
||||
testHelper("psql.tsv", CSVFormat.POSTGRESQL_TEXT);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteThenRead() throws Exception {
|
||||
StringWriter sw = new StringWriter();
|
||||
|
||||
CSVPrinter printer = new CSVPrinter(sw,
|
||||
CSVFormat.POSTGRESQL_CSV.builder().setHeader().setSkipHeaderRecord(true).build());
|
||||
|
||||
printer.printRecord("column1", "column2");
|
||||
printer.printRecord("v11", "v12");
|
||||
printer.printRecord("v21", "v22");
|
||||
printer.close();
|
||||
|
||||
CSVParser parser = new CSVParser(new StringReader(sw.toString()),
|
||||
CSVFormat.POSTGRESQL_CSV.builder().setHeader().setSkipHeaderRecord(true).build());
|
||||
|
||||
assertArrayEquals(new Object[] { "column1", "column2" }, parser.getHeaderNames().toArray());
|
||||
|
||||
Iterator<CSVRecord> i = parser.iterator();
|
||||
assertArrayEquals(new String[] { "v11", "v12" }, i.next().toList().toArray());
|
||||
assertArrayEquals(new String[] { "v21", "v22" }, i.next().toList().toArray());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
1,abc,"test line 1
|
||||
test line 2",,""
|
||||
2,xyz,"\b: \t: \n:
|
||||
\r:
",a,b
|
||||
3,a,"b,c,d","""quoted""",e
|
|
|
@ -0,0 +1,3 @@
|
|||
1 abc test line 1\ntest line 2 \N
|
||||
2 xyz \\b:\b \\t:\t \\n:\n \\r:\r a b
|
||||
3 a b,c,d "quoted" e
|
|
Loading…
Reference in New Issue