[CSV-216] Allow for mutable CSV records.

This commit is contained in:
Gary Gregory 2017-08-18 09:39:58 -06:00
parent 259812ec0a
commit b23f963e8d
6 changed files with 181 additions and 42 deletions

View File

@ -242,7 +242,7 @@ public final class CSVFormat implements Serializable {
* @see Predefined#Default
*/
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
null, null, null, false, false, false, false, false);
null, null, null, false, false, false, false, false, false);
/**
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
@ -537,7 +537,7 @@ public final class CSVFormat implements Serializable {
*/
public static CSVFormat newFormat(final char delimiter) {
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
false, false, false);
false, false, false, false);
}
/**
@ -570,6 +570,8 @@ public final class CSVFormat implements Serializable {
private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
private final boolean mutableRecords;
private final String nullString; // the string to be used for null values
private final Character quoteCharacter; // null if quoting is disabled
@ -619,6 +621,7 @@ public final class CSVFormat implements Serializable {
* TODO
* @param trailingDelimiter
* TODO
* @param mutableRecords TODO
* @throws IllegalArgumentException
* if the delimiter is a line break character
*/
@ -627,7 +630,7 @@ public final class CSVFormat implements Serializable {
final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
final boolean trailingDelimiter) {
final boolean trailingDelimiter, boolean mutableRecords) {
this.delimiter = delimiter;
this.quoteCharacter = quoteChar;
this.quoteMode = quoteMode;
@ -644,6 +647,7 @@ public final class CSVFormat implements Serializable {
this.ignoreHeaderCase = ignoreHeaderCase;
this.trailingDelimiter = trailingDelimiter;
this.trim = trim;
this.mutableRecords = mutableRecords;
validate();
}
@ -927,6 +931,10 @@ public final class CSVFormat implements Serializable {
return escapeCharacter != null;
}
public boolean isMutableRecords() {
return mutableRecords;
}
/**
* Returns whether a nullString has been defined.
*
@ -1431,7 +1439,7 @@ public final class CSVFormat implements Serializable {
public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1466,7 +1474,7 @@ public final class CSVFormat implements Serializable {
}
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1484,7 +1492,7 @@ public final class CSVFormat implements Serializable {
}
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1515,7 +1523,7 @@ public final class CSVFormat implements Serializable {
}
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1670,7 +1678,7 @@ public final class CSVFormat implements Serializable {
public CSVFormat withHeader(final String... header) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1691,7 +1699,7 @@ public final class CSVFormat implements Serializable {
public CSVFormat withHeaderComments(final Object... headerComments) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1716,7 +1724,7 @@ public final class CSVFormat implements Serializable {
public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1742,7 +1750,7 @@ public final class CSVFormat implements Serializable {
public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1767,7 +1775,25 @@ public final class CSVFormat implements Serializable {
public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
* Returns a new {@code CSVFormat} with whether to generate CSVRecord or CSVMutableRecord.
* <ul>
* <li><strong>Reading:</strong> Whether to generate CSVRecord or CSVMutableRecord.</li>
* <li><strong>Writing:</strong> No effect.</li>
* </ul>
*
* @param mutableRecords
* whether to generate CSVRecord or CSVMutableRecord
*
* @return A new CSVFormat that is equal to this but with the specified null conversion string.
*/
public CSVFormat withMutableRecords(final boolean mutableRecords) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1786,7 +1812,7 @@ public final class CSVFormat implements Serializable {
public CSVFormat withNullString(final String nullString) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1817,7 +1843,7 @@ public final class CSVFormat implements Serializable {
}
return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1831,7 +1857,7 @@ public final class CSVFormat implements Serializable {
public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1869,7 +1895,7 @@ public final class CSVFormat implements Serializable {
public CSVFormat withRecordSeparator(final String recordSeparator) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1896,7 +1922,7 @@ public final class CSVFormat implements Serializable {
public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1921,7 +1947,7 @@ public final class CSVFormat implements Serializable {
public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
/**
@ -1946,6 +1972,7 @@ public final class CSVFormat implements Serializable {
public CSVFormat withTrim(final boolean trim) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter);
skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, mutableRecords);
}
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import java.util.Map;
public final class CSVMutableRecord extends CSVRecord {
private static final long serialVersionUID = 1L;
CSVMutableRecord(String[] values, Map<String, Integer> mapping, String comment, long recordNumber,
long characterPosition) {
super(values, mapping, comment, recordNumber, characterPosition);
}
@Override
public void put(int index, String value) {
super.put(index, value);
}
@Override
public void put(String name, String value) {
super.put(name, value);
}
}

View File

@ -300,7 +300,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
private final long characterOffset;
private final Token reusableToken = new Token();
/**
* Customized CSV parser using the given {@link CSVFormat}
*
@ -614,8 +614,10 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
if (!this.recordList.isEmpty()) {
this.recordNumber++;
final String comment = sb == null ? null : sb.toString();
result = new CSVRecord(this.recordList.toArray(new String[this.recordList.size()]), this.headerMap, comment,
this.recordNumber, startCharPosition);
String[] array = this.recordList.toArray(new String[this.recordList.size()]);
result = format.isMutableRecords()
? new CSVMutableRecord(array, this.headerMap, comment, this.recordNumber, startCharPosition)
: new CSVRecord(array, this.headerMap, comment, this.recordNumber, startCharPosition);
}
return result;
}

View File

@ -28,7 +28,7 @@ import java.util.Map.Entry;
/**
* A CSV record parsed from a CSV file.
*/
public final class CSVRecord implements Serializable, Iterable<String> {
public class CSVRecord implements Serializable, Iterable<String> {
private static final String[] EMPTY_STRING_ARRAY = new String[0];
@ -95,22 +95,28 @@ public final class CSVRecord implements Serializable, Iterable<String> {
public String get(final String name) {
if (mapping == null) {
throw new IllegalStateException(
"No header mapping was specified, the record values can't be accessed by name");
}
final Integer index = mapping.get(name);
if (index == null) {
throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name,
mapping.keySet()));
"No header mapping was specified, the record values can't be accessed by name");
}
final int intIndex = getIndex(name);
try {
return values[index.intValue()];
return values[intIndex];
} catch (final ArrayIndexOutOfBoundsException e) {
throw new IllegalArgumentException(String.format(
"Index for header '%s' is %d but CSVRecord only has %d values!", name, index,
Integer.valueOf(values.length)));
throw new IllegalArgumentException(
String.format("Index for header '%s' is %d but CSVRecord only has %d values!", name, intIndex,
Integer.valueOf(values.length)));
}
}
int getIndex(final String name) {
final Integer integerIndex = mapping.get(name);
if (integerIndex == null) {
throw new IllegalArgumentException(
String.format("Mapping for %s not found, expected one of %s", name, mapping.keySet()));
}
int intIndex = integerIndex.intValue();
return intIndex;
}
/**
* Returns the start position of this record as a character position in the source stream. This may or may not
* correspond to the byte position depending on the character set.
@ -207,6 +213,14 @@ public final class CSVRecord implements Serializable, Iterable<String> {
return toList().iterator();
}
void put(final int index, String value) {
values[index] = value;
}
void put(final String name, String value) {
values[getIndex(name)] = value;
}
/**
* Puts all values of this record into the given Map.
*

View File

@ -0,0 +1,32 @@
package org.apache.commons.csv;
import org.junit.Assert;
public class CSVMutableRecordTest extends CSVRecordTest {
@Override
protected CSVFormat createCommaFormat() {
return super.createCommaFormat().withMutableRecords(true);
}
@Override
protected CSVFormat createDefaultFormat() {
return super.createDefaultFormat().withMutableRecords(true);
}
@Override
protected CSVRecord newRecord() {
return new CSVMutableRecord(values, null, null, 0, -1);
}
@Override
protected CSVRecord newRecordWithHeader() {
return new CSVMutableRecord(values, header, null, 0, -1);
}
@Override
protected void validate(final CSVRecord anyRecord) {
Assert.assertEquals(CSVMutableRecord.class, anyRecord.getClass());
}
}

View File

@ -38,19 +38,32 @@ public class CSVRecordTest {
private enum EnumFixture { UNKNOWN_COLUMN }
private String[] values;
private CSVRecord record, recordWithHeader;
private Map<String, Integer> header;
protected String[] values;
protected CSVRecord record, recordWithHeader;
protected Map<String, Integer> header;
@Before
public void setUp() throws Exception {
values = new String[] { "A", "B", "C" };
record = new CSVRecord(values, null, null, 0, -1);
record = newRecord();
header = new HashMap<>();
header.put("first", Integer.valueOf(0));
header.put("second", Integer.valueOf(1));
header.put("third", Integer.valueOf(2));
recordWithHeader = new CSVRecord(values, header, null, 0, -1);
recordWithHeader = newRecordWithHeader();
validate(recordWithHeader);
}
protected CSVRecord newRecord() {
return new CSVRecord(values, null, null, 0, -1);
}
protected void validate(final CSVRecord anyRecord) {
Assert.assertEquals(CSVRecord.class, anyRecord.getClass());
}
protected CSVRecord newRecordWithHeader() {
return new CSVRecord(values, header, null, 0, -1);
}
@Test
@ -143,7 +156,7 @@ public class CSVRecordTest {
@Test
public void testRemoveAndAddColumns() throws IOException {
// do:
try (final CSVPrinter printer = new CSVPrinter(new StringBuilder(), CSVFormat.DEFAULT)) {
try (final CSVPrinter printer = new CSVPrinter(new StringBuilder(), createDefaultFormat())) {
final Map<String, String> map = recordWithHeader.toMap();
map.remove("OldColumn");
map.put("ZColumn", "NewValue");
@ -151,10 +164,14 @@ public class CSVRecordTest {
final ArrayList<String> list = new ArrayList<>(map.values());
Collections.sort(list);
printer.printRecord(list);
Assert.assertEquals("A,B,C,NewValue" + CSVFormat.DEFAULT.getRecordSeparator(), printer.getOut().toString());
Assert.assertEquals("A,B,C,NewValue" + createDefaultFormat().getRecordSeparator(), printer.getOut().toString());
}
}
protected CSVFormat createDefaultFormat() {
return CSVFormat.DEFAULT;
}
@Test
public void testToMap() {
final Map<String, String> map = this.recordWithHeader.toMap();
@ -163,22 +180,28 @@ public class CSVRecordTest {
@Test
public void testToMapWithShortRecord() throws Exception {
try (final CSVParser parser = CSVParser.parse("a,b", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
try (final CSVParser parser = CSVParser.parse("a,b", createDefaultFormat().withHeader("A", "B", "C"))) {
final CSVRecord shortRec = parser.iterator().next();
validate(shortRec);
shortRec.toMap();
}
}
@Test
public void testToMapWithNoHeader() throws Exception {
try (final CSVParser parser = CSVParser.parse("a,b", CSVFormat.newFormat(','))) {
try (final CSVParser parser = CSVParser.parse("a,b", createCommaFormat())) {
final CSVRecord shortRec = parser.iterator().next();
validate(shortRec);
final Map<String, String> map = shortRec.toMap();
assertNotNull("Map is not null.", map);
assertTrue("Map is empty.", map.isEmpty());
}
}
protected CSVFormat createCommaFormat() {
return CSVFormat.newFormat(',');
}
private void validateMap(final Map<String, String> map, final boolean allowsNulls) {
assertTrue(map.containsKey("first"));
assertTrue(map.containsKey("second"));