From 5a0894f9e0ee9f4703b8db3f200ff4a507bf043b Mon Sep 17 00:00:00 2001 From: "Gary D. Gregory" Date: Mon, 24 Jun 2013 03:06:05 +0000 Subject: [PATCH] [CSV-93] Allow the handling of NULL values. Use a single property 'nullString' for both input and output processing. No substitutions occur if null. For reading, nullString is used to convert field values to null. For writing, nullString is used to output the given string instead of the empty string. git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1495911 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/commons/csv/CSVFormat.java | 78 +++++++++++-------- .../org/apache/commons/csv/CSVParser.java | 21 +++-- .../org/apache/commons/csv/CSVPrinter.java | 10 ++- .../apache/commons/csv/CSVPrinterTest.java | 23 +++++- 4 files changed, 92 insertions(+), 40 deletions(-) diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index 04e8cff9..5b507cf7 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -63,7 +63,7 @@ public class CSVFormat implements Serializable { private boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values? private boolean ignoreEmptyLines; private String recordSeparator; // for outputs - private String nullToString; // for outputs + private String nullString; private String[] header; /** @@ -75,7 +75,7 @@ public class CSVFormat implements Serializable { */ // package protected to give access without needing a synthetic accessor CSVFormatBuilder(final char delimiter){ - this(delimiter, null, null, null, null, false, false, null, Constants.EMPTY, null); + this(delimiter, null, null, null, null, false, false, null, null, null); } /** @@ -95,11 +95,12 @@ public class CSVFormat implements Serializable { * true when whitespaces enclosing values should be ignored * @param ignoreEmptyLines * true when the parser should skip empty lines - * @param nullToString TODO - * @param header - * the header * @param recordSeparator * the record separator to use for output + * @param nullString + * the String to convert to and from {@code null}. No substitution occurs if {@code null} + * @param header + * the header * @throws IllegalArgumentException if the delimiter is a line break character */ // package protected for use by test code @@ -107,7 +108,7 @@ public class CSVFormat implements Serializable { final Quote quotePolicy, final Character commentStart, final Character escape, final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, - final String nullToString, final String[] header) { + String nullString, final String[] header) { if (isLineBreak(delimiter)) { throw new IllegalArgumentException("The delimiter cannot be a line break"); } @@ -119,7 +120,7 @@ public class CSVFormat implements Serializable { this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; this.ignoreEmptyLines = ignoreEmptyLines; this.recordSeparator = recordSeparator; - this.nullToString = nullToString; + this.nullString = nullString; this.header = header; } @@ -135,7 +136,7 @@ public class CSVFormat implements Serializable { this(format.delimiter, format.quoteChar, format.quotePolicy, format.commentStart, format.escape, format.ignoreSurroundingSpaces, format.ignoreEmptyLines, - format.recordSeparator, format.nullToString, format.header); + format.recordSeparator, format.nullString, format.header); } /** @@ -146,7 +147,8 @@ public class CSVFormat implements Serializable { public CSVFormat build() { validate(); return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullToString, header); + ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, + header); } /** @@ -331,15 +333,22 @@ public class CSVFormat implements Serializable { } /** - * Sets the String to use for null values for output. - * - * @param nullToString - * the String to use for null values for output. - * - * @return This builder with the the specified output record separator + * Performs conversions to and from null for strings on input and output. + * + * + * @param nullString + * the String to convert to and from {@code null}. No substitution occurs if {@code null} + * + * @return This builder with the the specified null conversion string. */ - public CSVFormatBuilder withNullToString(final String nullToString) { - this.nullToString = nullToString; + public CSVFormatBuilder withNullString(final String nullString) { + this.nullString = nullString; return this; } @@ -439,9 +448,9 @@ public class CSVFormat implements Serializable { * @return a standard comma separated format builder, as for {@link #RFC4180} but allowing empty lines. */ public static CSVFormatBuilder newBuilder() { - return new CSVFormatBuilder(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, Constants.EMPTY, - null); + return new CSVFormatBuilder(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null); } + private final char delimiter; private final Character quoteChar; private final Quote quotePolicy; @@ -449,11 +458,8 @@ public class CSVFormat implements Serializable { private final Character escape; private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values? private final boolean ignoreEmptyLines; - private final String recordSeparator; // for outputs - - private final String nullToString; // for outputs - + private final String nullString; private final String[] header; /** @@ -588,8 +594,8 @@ public class CSVFormat implements Serializable { * true when the parser should skip empty lines * @param recordSeparator * the line separator to use for output - * @param nullToString - * the String to use to write null values. + * @param nullString + * the line separator to use for output * @param header * the header * @throws IllegalArgumentException if the delimiter is a line break character @@ -599,7 +605,7 @@ public class CSVFormat implements Serializable { final Quote quotePolicy, final Character commentStart, final Character escape, final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, - final String nullToString, final String[] header) { + final String nullString, final String[] header) { if (isLineBreak(delimiter)) { throw new IllegalArgumentException("The delimiter cannot be a line break"); } @@ -611,7 +617,7 @@ public class CSVFormat implements Serializable { this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; this.ignoreEmptyLines = ignoreEmptyLines; this.recordSeparator = recordSeparator; - this.nullToString = nullToString; + this.nullString = nullString; this.header = header == null ? null : header.clone(); } @@ -744,12 +750,20 @@ public class CSVFormat implements Serializable { } /** - * Returns the value to use for writing null values. - * - * @return the value to use for writing null values. + * Gets the String to convert to and from {@code null}. + * + * + * @return the String to convert to and from {@code null}. No substitution occurs if {@code null} */ - public String getNullToString() { - return nullToString; + public String getNullString() { + return nullString; } /** diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 77f3ab31..846ea0ad 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -85,6 +85,7 @@ public class CSVParser implements Iterable { private final Lexer lexer; private final Map headerMap; private long recordNumber; + private final CSVFormat format; // the following objects are shared to reduce garbage @@ -120,7 +121,8 @@ public class CSVParser implements Iterable { */ public CSVParser(final Reader input, final CSVFormat format) throws IOException { this.lexer = new CSVLexer(format, new ExtendedBufferedReader(input)); - this.headerMap = initializeHeader(format); + this.format = format; + this.headerMap = initializeHeader(); } /** @@ -189,14 +191,14 @@ public class CSVParser implements Iterable { lexer.nextToken(reusableToken); switch (reusableToken.type) { case TOKEN: - record.add(reusableToken.content.toString()); + this.addRecordValue(); break; case EORECORD: - record.add(reusableToken.content.toString()); + this.addRecordValue(); break; case EOF: if (reusableToken.isReady) { - record.add(reusableToken.content.toString()); + this.addRecordValue(); } break; case INVALID: @@ -221,6 +223,15 @@ public class CSVParser implements Iterable { return result; } + private void addRecordValue() { + final String input = reusableToken.content.toString(); + final String nullString = this.format.getNullString(); + if (nullString == null) { + record.add(input); + } else { + record.add(input.equalsIgnoreCase(nullString) ? null : input); + }} + /** * Parses the CSV input according to the given format and returns the content as an array of {@link CSVRecord} * entries. @@ -243,7 +254,7 @@ public class CSVParser implements Iterable { /** * Initializes the name to index mapping if the format defines a header. */ - private Map initializeHeader(final CSVFormat format) throws IOException { + private Map initializeHeader() throws IOException { Map hdrMap = null; if (format.getHeader() != null) { hdrMap = new LinkedHashMap(); diff --git a/src/main/java/org/apache/commons/csv/CSVPrinter.java b/src/main/java/org/apache/commons/csv/CSVPrinter.java index 8554d1ca..177d6400 100644 --- a/src/main/java/org/apache/commons/csv/CSVPrinter.java +++ b/src/main/java/org/apache/commons/csv/CSVPrinter.java @@ -337,8 +337,14 @@ public class CSVPrinter implements Flushable, Closeable { */ public void print(final Object value) throws IOException { // null values are considered empty - final String strValue = value == null ? format.getNullToString() : value.toString(); - print(value, strValue, 0, strValue.length()); + String strValue; + if (value == null) { + final String nullString = format.getNullString(); + strValue = nullString == null ? Constants.EMPTY : nullString; + } else { + strValue = value.toString(); + } + this.print(value, strValue, 0, strValue.length()); } /** diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java index c8501b8f..25f0ccaf 100644 --- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java +++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java @@ -18,14 +18,17 @@ package org.apache.commons.csv; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import java.io.IOException; +import java.io.StringReader; import java.io.StringWriter; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; import java.sql.Statement; import java.util.Arrays; +import java.util.Iterator; import java.util.List; import java.util.Random; @@ -310,12 +313,30 @@ public class CSVPrinterTest { @Test public void testPrintCustomNullValues() throws IOException { final StringWriter sw = new StringWriter(); - final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.toBuilder().withNullToString("NULL").build()); + final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.toBuilder().withNullString("NULL").build()); printer.printRecord("a", null, "b"); assertEquals("a,NULL,b" + recordSeparator, sw.toString()); printer.close(); } + @Test + public void testParseCustomNullValues() throws IOException { + final StringWriter sw = new StringWriter(); + final CSVFormat format = CSVFormat.DEFAULT.toBuilder().withNullString("NULL").build(); + final CSVPrinter printer = new CSVPrinter(sw, format); + printer.printRecord("a", null, "b"); + printer.close(); + String csvString = sw.toString(); + assertEquals("a,NULL,b" + recordSeparator, csvString); + final Iterable iterable = format.parse(new StringReader(csvString)); + final Iterator iterator = iterable.iterator(); + final CSVRecord record = iterator.next(); + assertEquals("a", record.get(0)); + assertEquals(null, record.get(1)); + assertEquals("b", record.get(2)); + assertFalse(iterator.hasNext()); + } + @Test public void testQuoteAll() throws IOException { final StringWriter sw = new StringWriter();