[CSV-93] Allow the handling of NULL values. Use a single property 'nullString' for both input and output processing. No substitutions occur if null. For reading, nullString is used to convert field values to null. For writing, nullString is used to output the given string instead of the empty string.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1495911 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Gary D. Gregory 2013-06-24 03:06:05 +00:00
parent 530b038269
commit 5a0894f9e0
4 changed files with 92 additions and 40 deletions

View File

@ -63,7 +63,7 @@ public class CSVFormat implements Serializable {
private boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values? private boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
private boolean ignoreEmptyLines; private boolean ignoreEmptyLines;
private String recordSeparator; // for outputs private String recordSeparator; // for outputs
private String nullToString; // for outputs private String nullString;
private String[] header; private String[] header;
/** /**
@ -75,7 +75,7 @@ public class CSVFormat implements Serializable {
*/ */
// package protected to give access without needing a synthetic accessor // package protected to give access without needing a synthetic accessor
CSVFormatBuilder(final char delimiter){ CSVFormatBuilder(final char delimiter){
this(delimiter, null, null, null, null, false, false, null, Constants.EMPTY, null); this(delimiter, null, null, null, null, false, false, null, null, null);
} }
/** /**
@ -95,11 +95,12 @@ public class CSVFormat implements Serializable {
* <tt>true</tt> when whitespaces enclosing values should be ignored * <tt>true</tt> when whitespaces enclosing values should be ignored
* @param ignoreEmptyLines * @param ignoreEmptyLines
* <tt>true</tt> when the parser should skip empty lines * <tt>true</tt> when the parser should skip empty lines
* @param nullToString TODO
* @param header
* the header
* @param recordSeparator * @param recordSeparator
* the record separator to use for output * the record separator to use for output
* @param nullString
* the String to convert to and from {@code null}. No substitution occurs if {@code null}
* @param header
* the header
* @throws IllegalArgumentException if the delimiter is a line break character * @throws IllegalArgumentException if the delimiter is a line break character
*/ */
// package protected for use by test code // package protected for use by test code
@ -107,7 +108,7 @@ public class CSVFormat implements Serializable {
final Quote quotePolicy, final Character commentStart, final Quote quotePolicy, final Character commentStart,
final Character escape, final boolean ignoreSurroundingSpaces, final Character escape, final boolean ignoreSurroundingSpaces,
final boolean ignoreEmptyLines, final String recordSeparator, final boolean ignoreEmptyLines, final String recordSeparator,
final String nullToString, final String[] header) { String nullString, final String[] header) {
if (isLineBreak(delimiter)) { if (isLineBreak(delimiter)) {
throw new IllegalArgumentException("The delimiter cannot be a line break"); throw new IllegalArgumentException("The delimiter cannot be a line break");
} }
@ -119,7 +120,7 @@ public class CSVFormat implements Serializable {
this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
this.ignoreEmptyLines = ignoreEmptyLines; this.ignoreEmptyLines = ignoreEmptyLines;
this.recordSeparator = recordSeparator; this.recordSeparator = recordSeparator;
this.nullToString = nullToString; this.nullString = nullString;
this.header = header; this.header = header;
} }
@ -135,7 +136,7 @@ public class CSVFormat implements Serializable {
this(format.delimiter, format.quoteChar, format.quotePolicy, this(format.delimiter, format.quoteChar, format.quotePolicy,
format.commentStart, format.escape, format.commentStart, format.escape,
format.ignoreSurroundingSpaces, format.ignoreEmptyLines, format.ignoreSurroundingSpaces, format.ignoreEmptyLines,
format.recordSeparator, format.nullToString, format.header); format.recordSeparator, format.nullString, format.header);
} }
/** /**
@ -146,7 +147,8 @@ public class CSVFormat implements Serializable {
public CSVFormat build() { public CSVFormat build() {
validate(); validate();
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape, return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullToString, header); ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString,
header);
} }
/** /**
@ -331,15 +333,22 @@ public class CSVFormat implements Serializable {
} }
/** /**
* Sets the String to use for null values for output. * Performs conversions to and from null for strings on input and output.
* * <ul>
* @param nullToString * <li>
* the String to use for null values for output. * <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
* * records.</li>
* @return This builder with the the specified output record separator * <li>
* <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
* </ul>
*
* @param nullString
* the String to convert to and from {@code null}. No substitution occurs if {@code null}
*
* @return This builder with the the specified null conversion string.
*/ */
public CSVFormatBuilder withNullToString(final String nullToString) { public CSVFormatBuilder withNullString(final String nullString) {
this.nullToString = nullToString; this.nullString = nullString;
return this; return this;
} }
@ -439,9 +448,9 @@ public class CSVFormat implements Serializable {
* @return a standard comma separated format builder, as for {@link #RFC4180} but allowing empty lines. * @return a standard comma separated format builder, as for {@link #RFC4180} but allowing empty lines.
*/ */
public static CSVFormatBuilder newBuilder() { public static CSVFormatBuilder newBuilder() {
return new CSVFormatBuilder(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, Constants.EMPTY, return new CSVFormatBuilder(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null);
null);
} }
private final char delimiter; private final char delimiter;
private final Character quoteChar; private final Character quoteChar;
private final Quote quotePolicy; private final Quote quotePolicy;
@ -449,11 +458,8 @@ public class CSVFormat implements Serializable {
private final Character escape; private final Character escape;
private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values? private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
private final boolean ignoreEmptyLines; private final boolean ignoreEmptyLines;
private final String recordSeparator; // for outputs private final String recordSeparator; // for outputs
private final String nullString;
private final String nullToString; // for outputs
private final String[] header; private final String[] header;
/** /**
@ -588,8 +594,8 @@ public class CSVFormat implements Serializable {
* <tt>true</tt> when the parser should skip empty lines * <tt>true</tt> when the parser should skip empty lines
* @param recordSeparator * @param recordSeparator
* the line separator to use for output * the line separator to use for output
* @param nullToString * @param nullString
* the String to use to write <code>null</code> values. * the line separator to use for output
* @param header * @param header
* the header * the header
* @throws IllegalArgumentException if the delimiter is a line break character * @throws IllegalArgumentException if the delimiter is a line break character
@ -599,7 +605,7 @@ public class CSVFormat implements Serializable {
final Quote quotePolicy, final Character commentStart, final Quote quotePolicy, final Character commentStart,
final Character escape, final boolean ignoreSurroundingSpaces, final Character escape, final boolean ignoreSurroundingSpaces,
final boolean ignoreEmptyLines, final String recordSeparator, final boolean ignoreEmptyLines, final String recordSeparator,
final String nullToString, final String[] header) { final String nullString, final String[] header) {
if (isLineBreak(delimiter)) { if (isLineBreak(delimiter)) {
throw new IllegalArgumentException("The delimiter cannot be a line break"); throw new IllegalArgumentException("The delimiter cannot be a line break");
} }
@ -611,7 +617,7 @@ public class CSVFormat implements Serializable {
this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
this.ignoreEmptyLines = ignoreEmptyLines; this.ignoreEmptyLines = ignoreEmptyLines;
this.recordSeparator = recordSeparator; this.recordSeparator = recordSeparator;
this.nullToString = nullToString; this.nullString = nullString;
this.header = header == null ? null : header.clone(); this.header = header == null ? null : header.clone();
} }
@ -744,12 +750,20 @@ public class CSVFormat implements Serializable {
} }
/** /**
* Returns the value to use for writing null values. * Gets the String to convert to and from {@code null}.
* * <ul>
* @return the value to use for writing null values. * <li>
* <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
* records.
* </li>
* <li>
* <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
* </ul>
*
* @return the String to convert to and from {@code null}. No substitution occurs if {@code null}
*/ */
public String getNullToString() { public String getNullString() {
return nullToString; return nullString;
} }
/** /**

View File

@ -85,6 +85,7 @@ public class CSVParser implements Iterable<CSVRecord> {
private final Lexer lexer; private final Lexer lexer;
private final Map<String, Integer> headerMap; private final Map<String, Integer> headerMap;
private long recordNumber; private long recordNumber;
private final CSVFormat format;
// the following objects are shared to reduce garbage // the following objects are shared to reduce garbage
@ -120,7 +121,8 @@ public class CSVParser implements Iterable<CSVRecord> {
*/ */
public CSVParser(final Reader input, final CSVFormat format) throws IOException { public CSVParser(final Reader input, final CSVFormat format) throws IOException {
this.lexer = new CSVLexer(format, new ExtendedBufferedReader(input)); this.lexer = new CSVLexer(format, new ExtendedBufferedReader(input));
this.headerMap = initializeHeader(format); this.format = format;
this.headerMap = initializeHeader();
} }
/** /**
@ -189,14 +191,14 @@ public class CSVParser implements Iterable<CSVRecord> {
lexer.nextToken(reusableToken); lexer.nextToken(reusableToken);
switch (reusableToken.type) { switch (reusableToken.type) {
case TOKEN: case TOKEN:
record.add(reusableToken.content.toString()); this.addRecordValue();
break; break;
case EORECORD: case EORECORD:
record.add(reusableToken.content.toString()); this.addRecordValue();
break; break;
case EOF: case EOF:
if (reusableToken.isReady) { if (reusableToken.isReady) {
record.add(reusableToken.content.toString()); this.addRecordValue();
} }
break; break;
case INVALID: case INVALID:
@ -221,6 +223,15 @@ public class CSVParser implements Iterable<CSVRecord> {
return result; return result;
} }
private void addRecordValue() {
final String input = reusableToken.content.toString();
final String nullString = this.format.getNullString();
if (nullString == null) {
record.add(input);
} else {
record.add(input.equalsIgnoreCase(nullString) ? null : input);
}}
/** /**
* Parses the CSV input according to the given format and returns the content as an array of {@link CSVRecord} * Parses the CSV input according to the given format and returns the content as an array of {@link CSVRecord}
* entries. * entries.
@ -243,7 +254,7 @@ public class CSVParser implements Iterable<CSVRecord> {
/** /**
* Initializes the name to index mapping if the format defines a header. * Initializes the name to index mapping if the format defines a header.
*/ */
private Map<String, Integer> initializeHeader(final CSVFormat format) throws IOException { private Map<String, Integer> initializeHeader() throws IOException {
Map<String, Integer> hdrMap = null; Map<String, Integer> hdrMap = null;
if (format.getHeader() != null) { if (format.getHeader() != null) {
hdrMap = new LinkedHashMap<String, Integer>(); hdrMap = new LinkedHashMap<String, Integer>();

View File

@ -337,8 +337,14 @@ public class CSVPrinter implements Flushable, Closeable {
*/ */
public void print(final Object value) throws IOException { public void print(final Object value) throws IOException {
// null values are considered empty // null values are considered empty
final String strValue = value == null ? format.getNullToString() : value.toString(); String strValue;
print(value, strValue, 0, strValue.length()); if (value == null) {
final String nullString = format.getNullString();
strValue = nullString == null ? Constants.EMPTY : nullString;
} else {
strValue = value.toString();
}
this.print(value, strValue, 0, strValue.length());
} }
/** /**

View File

@ -18,14 +18,17 @@
package org.apache.commons.csv; package org.apache.commons.csv;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter; import java.io.StringWriter;
import java.sql.Connection; import java.sql.Connection;
import java.sql.DriverManager; import java.sql.DriverManager;
import java.sql.SQLException; import java.sql.SQLException;
import java.sql.Statement; import java.sql.Statement;
import java.util.Arrays; import java.util.Arrays;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
@ -310,12 +313,30 @@ public class CSVPrinterTest {
@Test @Test
public void testPrintCustomNullValues() throws IOException { public void testPrintCustomNullValues() throws IOException {
final StringWriter sw = new StringWriter(); final StringWriter sw = new StringWriter();
final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.toBuilder().withNullToString("NULL").build()); final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.toBuilder().withNullString("NULL").build());
printer.printRecord("a", null, "b"); printer.printRecord("a", null, "b");
assertEquals("a,NULL,b" + recordSeparator, sw.toString()); assertEquals("a,NULL,b" + recordSeparator, sw.toString());
printer.close(); printer.close();
} }
@Test
public void testParseCustomNullValues() throws IOException {
final StringWriter sw = new StringWriter();
final CSVFormat format = CSVFormat.DEFAULT.toBuilder().withNullString("NULL").build();
final CSVPrinter printer = new CSVPrinter(sw, format);
printer.printRecord("a", null, "b");
printer.close();
String csvString = sw.toString();
assertEquals("a,NULL,b" + recordSeparator, csvString);
final Iterable<CSVRecord> iterable = format.parse(new StringReader(csvString));
final Iterator<CSVRecord> iterator = iterable.iterator();
final CSVRecord record = iterator.next();
assertEquals("a", record.get(0));
assertEquals(null, record.get(1));
assertEquals("b", record.get(2));
assertFalse(iterator.hasNext());
}
@Test @Test
public void testQuoteAll() throws IOException { public void testQuoteAll() throws IOException {
final StringWriter sw = new StringWriter(); final StringWriter sw = new StringWriter();