[CSV-93] Allow the handling of NULL values. Use a single property 'nullString' for both input and output processing. No substitutions occur if null. For reading, nullString is used to convert field values to null. For writing, nullString is used to output the given string instead of the empty string.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1495911 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Gary D. Gregory 2013-06-24 03:06:05 +00:00
parent 530b038269
commit 5a0894f9e0
4 changed files with 92 additions and 40 deletions

View File

@ -63,7 +63,7 @@ public class CSVFormat implements Serializable {
private boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
private boolean ignoreEmptyLines;
private String recordSeparator; // for outputs
private String nullToString; // for outputs
private String nullString;
private String[] header;
/**
@ -75,7 +75,7 @@ public class CSVFormat implements Serializable {
*/
// package protected to give access without needing a synthetic accessor
CSVFormatBuilder(final char delimiter){
this(delimiter, null, null, null, null, false, false, null, Constants.EMPTY, null);
this(delimiter, null, null, null, null, false, false, null, null, null);
}
/**
@ -95,11 +95,12 @@ public class CSVFormat implements Serializable {
* <tt>true</tt> when whitespaces enclosing values should be ignored
* @param ignoreEmptyLines
* <tt>true</tt> when the parser should skip empty lines
* @param nullToString TODO
* @param header
* the header
* @param recordSeparator
* the record separator to use for output
* @param nullString
* the String to convert to and from {@code null}. No substitution occurs if {@code null}
* @param header
* the header
* @throws IllegalArgumentException if the delimiter is a line break character
*/
// package protected for use by test code
@ -107,7 +108,7 @@ public class CSVFormat implements Serializable {
final Quote quotePolicy, final Character commentStart,
final Character escape, final boolean ignoreSurroundingSpaces,
final boolean ignoreEmptyLines, final String recordSeparator,
final String nullToString, final String[] header) {
String nullString, final String[] header) {
if (isLineBreak(delimiter)) {
throw new IllegalArgumentException("The delimiter cannot be a line break");
}
@ -119,7 +120,7 @@ public class CSVFormat implements Serializable {
this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
this.ignoreEmptyLines = ignoreEmptyLines;
this.recordSeparator = recordSeparator;
this.nullToString = nullToString;
this.nullString = nullString;
this.header = header;
}
@ -135,7 +136,7 @@ public class CSVFormat implements Serializable {
this(format.delimiter, format.quoteChar, format.quotePolicy,
format.commentStart, format.escape,
format.ignoreSurroundingSpaces, format.ignoreEmptyLines,
format.recordSeparator, format.nullToString, format.header);
format.recordSeparator, format.nullString, format.header);
}
/**
@ -146,7 +147,8 @@ public class CSVFormat implements Serializable {
public CSVFormat build() {
validate();
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullToString, header);
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString,
header);
}
/**
@ -331,15 +333,22 @@ public class CSVFormat implements Serializable {
}
/**
* Sets the String to use for null values for output.
* Performs conversions to and from null for strings on input and output.
* <ul>
* <li>
* <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
* records.</li>
* <li>
* <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
* </ul>
*
* @param nullToString
* the String to use for null values for output.
* @param nullString
* the String to convert to and from {@code null}. No substitution occurs if {@code null}
*
* @return This builder with the the specified output record separator
* @return This builder with the the specified null conversion string.
*/
public CSVFormatBuilder withNullToString(final String nullToString) {
this.nullToString = nullToString;
public CSVFormatBuilder withNullString(final String nullString) {
this.nullString = nullString;
return this;
}
@ -439,9 +448,9 @@ public class CSVFormat implements Serializable {
* @return a standard comma separated format builder, as for {@link #RFC4180} but allowing empty lines.
*/
public static CSVFormatBuilder newBuilder() {
return new CSVFormatBuilder(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, Constants.EMPTY,
null);
return new CSVFormatBuilder(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null);
}
private final char delimiter;
private final Character quoteChar;
private final Quote quotePolicy;
@ -449,11 +458,8 @@ public class CSVFormat implements Serializable {
private final Character escape;
private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
private final boolean ignoreEmptyLines;
private final String recordSeparator; // for outputs
private final String nullToString; // for outputs
private final String nullString;
private final String[] header;
/**
@ -588,8 +594,8 @@ public class CSVFormat implements Serializable {
* <tt>true</tt> when the parser should skip empty lines
* @param recordSeparator
* the line separator to use for output
* @param nullToString
* the String to use to write <code>null</code> values.
* @param nullString
* the line separator to use for output
* @param header
* the header
* @throws IllegalArgumentException if the delimiter is a line break character
@ -599,7 +605,7 @@ public class CSVFormat implements Serializable {
final Quote quotePolicy, final Character commentStart,
final Character escape, final boolean ignoreSurroundingSpaces,
final boolean ignoreEmptyLines, final String recordSeparator,
final String nullToString, final String[] header) {
final String nullString, final String[] header) {
if (isLineBreak(delimiter)) {
throw new IllegalArgumentException("The delimiter cannot be a line break");
}
@ -611,7 +617,7 @@ public class CSVFormat implements Serializable {
this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
this.ignoreEmptyLines = ignoreEmptyLines;
this.recordSeparator = recordSeparator;
this.nullToString = nullToString;
this.nullString = nullString;
this.header = header == null ? null : header.clone();
}
@ -744,12 +750,20 @@ public class CSVFormat implements Serializable {
}
/**
* Returns the value to use for writing null values.
* Gets the String to convert to and from {@code null}.
* <ul>
* <li>
* <strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
* records.
* </li>
* <li>
* <strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
* </ul>
*
* @return the value to use for writing null values.
* @return the String to convert to and from {@code null}. No substitution occurs if {@code null}
*/
public String getNullToString() {
return nullToString;
public String getNullString() {
return nullString;
}
/**

View File

@ -85,6 +85,7 @@ public class CSVParser implements Iterable<CSVRecord> {
private final Lexer lexer;
private final Map<String, Integer> headerMap;
private long recordNumber;
private final CSVFormat format;
// the following objects are shared to reduce garbage
@ -120,7 +121,8 @@ public class CSVParser implements Iterable<CSVRecord> {
*/
public CSVParser(final Reader input, final CSVFormat format) throws IOException {
this.lexer = new CSVLexer(format, new ExtendedBufferedReader(input));
this.headerMap = initializeHeader(format);
this.format = format;
this.headerMap = initializeHeader();
}
/**
@ -189,14 +191,14 @@ public class CSVParser implements Iterable<CSVRecord> {
lexer.nextToken(reusableToken);
switch (reusableToken.type) {
case TOKEN:
record.add(reusableToken.content.toString());
this.addRecordValue();
break;
case EORECORD:
record.add(reusableToken.content.toString());
this.addRecordValue();
break;
case EOF:
if (reusableToken.isReady) {
record.add(reusableToken.content.toString());
this.addRecordValue();
}
break;
case INVALID:
@ -221,6 +223,15 @@ public class CSVParser implements Iterable<CSVRecord> {
return result;
}
private void addRecordValue() {
final String input = reusableToken.content.toString();
final String nullString = this.format.getNullString();
if (nullString == null) {
record.add(input);
} else {
record.add(input.equalsIgnoreCase(nullString) ? null : input);
}}
/**
* Parses the CSV input according to the given format and returns the content as an array of {@link CSVRecord}
* entries.
@ -243,7 +254,7 @@ public class CSVParser implements Iterable<CSVRecord> {
/**
* Initializes the name to index mapping if the format defines a header.
*/
private Map<String, Integer> initializeHeader(final CSVFormat format) throws IOException {
private Map<String, Integer> initializeHeader() throws IOException {
Map<String, Integer> hdrMap = null;
if (format.getHeader() != null) {
hdrMap = new LinkedHashMap<String, Integer>();

View File

@ -337,8 +337,14 @@ public class CSVPrinter implements Flushable, Closeable {
*/
public void print(final Object value) throws IOException {
// null values are considered empty
final String strValue = value == null ? format.getNullToString() : value.toString();
print(value, strValue, 0, strValue.length());
String strValue;
if (value == null) {
final String nullString = format.getNullString();
strValue = nullString == null ? Constants.EMPTY : nullString;
} else {
strValue = value.toString();
}
this.print(value, strValue, 0, strValue.length());
}
/**

View File

@ -18,14 +18,17 @@
package org.apache.commons.csv;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
@ -310,12 +313,30 @@ public class CSVPrinterTest {
@Test
public void testPrintCustomNullValues() throws IOException {
final StringWriter sw = new StringWriter();
final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.toBuilder().withNullToString("NULL").build());
final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.toBuilder().withNullString("NULL").build());
printer.printRecord("a", null, "b");
assertEquals("a,NULL,b" + recordSeparator, sw.toString());
printer.close();
}
@Test
public void testParseCustomNullValues() throws IOException {
final StringWriter sw = new StringWriter();
final CSVFormat format = CSVFormat.DEFAULT.toBuilder().withNullString("NULL").build();
final CSVPrinter printer = new CSVPrinter(sw, format);
printer.printRecord("a", null, "b");
printer.close();
String csvString = sw.toString();
assertEquals("a,NULL,b" + recordSeparator, csvString);
final Iterable<CSVRecord> iterable = format.parse(new StringReader(csvString));
final Iterator<CSVRecord> iterator = iterable.iterator();
final CSVRecord record = iterator.next();
assertEquals("a", record.get(0));
assertEquals(null, record.get(1));
assertEquals("b", record.get(2));
assertFalse(iterator.hasNext());
}
@Test
public void testQuoteAll() throws IOException {
final StringWriter sw = new StringWriter();