[CSV-219] The behavior of quote char using is not similar as Excel does

when the first string contains CJK char(s).
[CSV-172] Don't quote cells just because they have UTF-8 encoded
characters.
This commit is contained in:
Gary Gregory 2017-12-11 11:16:01 -07:00
parent e76c4d809c
commit 8b3de71fd9
3 changed files with 14 additions and 6 deletions

View File

@ -40,6 +40,8 @@
<body> <body>
<release version="1.6" date="2017-MM-DD" description="Feature and bug fix release"> <release version="1.6" date="2017-MM-DD" description="Feature and bug fix release">
<action issue="CSV-217" type="add" dev="ggregory" due-to="Korolyov Alexei">Add autoFlush option for CsvPrinter. PR #24.</action> <action issue="CSV-217" type="add" dev="ggregory" due-to="Korolyov Alexei">Add autoFlush option for CsvPrinter. PR #24.</action>
<action issue="CSV-219" type="fix" dev="ggregory" due-to="Zhang Hongda">The behavior of quote char using is not similar as Excel does when the first string contains CJK char(s).</action>
<action issue="CSV-172" type="fix" dev="ggregory" due-to="Andrew Pennebaker">Don't quote cells just because they have UTF-8 encoded characters.</action>
</release> </release>
<release version="1.5" date="2017-09-03" description="Feature and bug fix release"> <release version="1.5" date="2017-09-03" description="Feature and bug fix release">
<action issue="CSV-203" type="fix" dev="ggregory" due-to="Richard Wheeldon, Kai Paroth">withNullString value is printed without quotes when QuoteMode.ALL is specified; add QuoteMode.ALL_NON_NULL. PR #17.</action> <action issue="CSV-203" type="fix" dev="ggregory" due-to="Richard Wheeldon, Kai Paroth">withNullString value is printed without quotes when QuoteMode.ALL is specified; add QuoteMode.ALL_NON_NULL. PR #17.</action>

View File

@ -1186,10 +1186,7 @@ public final class CSVFormat implements Serializable {
} else { } else {
char c = value.charAt(pos); char c = value.charAt(pos);
// RFC4180 (https://tools.ietf.org/html/rfc4180) TEXTDATA = %x20-21 / %x23-2B / %x2D-7E if (c <= COMMENT) {
if (newRecord && (c < 0x20 || c > 0x21 && c < 0x23 || c > 0x2B && c < 0x2D || c > 0x7E)) {
quote = true;
} else if (c <= COMMENT) {
// Some other chars at the start of a value caused the parser to fail, so for now // Some other chars at the start of a value caused the parser to fail, so for now
// encapsulate if we start in anything less than '#'. We are being conservative // encapsulate if we start in anything less than '#'. We are being conservative
// by including the default comment char too. // by including the default comment char too.

View File

@ -1033,11 +1033,20 @@ public class CSVPrinterTest {
} }
@Test @Test
public void testRfc4180QuoteSingleChar() throws IOException { public void testDontQuoteEuroFirstChar() throws IOException {
final StringWriter sw = new StringWriter(); final StringWriter sw = new StringWriter();
try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.RFC4180)) { try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.RFC4180)) {
printer.printRecord(EURO_CH, "Deux"); printer.printRecord(EURO_CH, "Deux");
assertEquals("\"" + EURO_CH + "\",Deux" + recordSeparator, sw.toString()); assertEquals(EURO_CH + ",Deux" + recordSeparator, sw.toString());
}
}
@Test
public void testQuoteCommaFirstChar() throws IOException {
final StringWriter sw = new StringWriter();
try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.RFC4180)) {
printer.printRecord(",");
assertEquals("\",\"" + recordSeparator, sw.toString());
} }
} }