[CSV-239] Cannot get headers in column order from CSVRecord.
This commit is contained in:
parent
d8d5de6476
commit
788f2aaa7a
|
@ -45,6 +45,7 @@
|
|||
<action issue="CSV-234" type="add" dev="ggregory" due-to="Roberto Benedetti, Gary Gregory">Add support for java.sql.Clob.</action>
|
||||
<action issue="CSV-237" type="update" dev="ggregory" due-to="Gary Gregory">Update to Java 8.</action>
|
||||
<action issue="CSV-238" type="fix" dev="ggregory" due-to="Stephen Olander-Waters">Escape quotes in CLOBs #39.</action>
|
||||
<action issue="CSV-239" type="add" dev="ggregory" due-to="Gary Gregory, Dave Moten">Cannot get headers in column order from CSVRecord.</action>
|
||||
<action type="update" dev="ggregory" due-to="Gary Gregory">Update tests from H2 1.4.198 to 1.4.199.</action>
|
||||
</release>
|
||||
<release version="1.6" date="2018-09-22" description="Feature and bug fix release (Java 7)">
|
||||
|
|
|
@ -39,6 +39,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.TreeMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Parses CSV files according to the specified format.
|
||||
|
@ -133,6 +134,61 @@ import java.util.TreeMap;
|
|||
*/
|
||||
public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
||||
|
||||
class CSVRecordIterator implements Iterator<CSVRecord> {
|
||||
private CSVRecord current;
|
||||
|
||||
private CSVRecord getNextRecord() {
|
||||
try {
|
||||
return CSVParser.this.nextRecord();
|
||||
} catch (final IOException e) {
|
||||
throw new IllegalStateException(
|
||||
e.getClass().getSimpleName() + " reading next record: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (CSVParser.this.isClosed()) {
|
||||
return false;
|
||||
}
|
||||
if (this.current == null) {
|
||||
this.current = this.getNextRecord();
|
||||
}
|
||||
|
||||
return this.current != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CSVRecord next() {
|
||||
if (CSVParser.this.isClosed()) {
|
||||
throw new NoSuchElementException("CSVParser has been closed");
|
||||
}
|
||||
CSVRecord next = this.current;
|
||||
this.current = null;
|
||||
|
||||
if (next == null) {
|
||||
// hasNext() wasn't called before
|
||||
next = this.getNextRecord();
|
||||
if (next == null) {
|
||||
throw new NoSuchElementException("No more CSV records available");
|
||||
}
|
||||
}
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
static List<String> createHeaderNames(final Map<String, Integer> headerMap) {
|
||||
return headerMap == null ? null
|
||||
: headerMap.entrySet().stream().sorted(Map.Entry.comparingByValue()).map(Map.Entry::getKey)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a parser for the given {@link File}.
|
||||
*
|
||||
|
@ -229,6 +285,8 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
return new CSVParser(reader, format);
|
||||
}
|
||||
|
||||
// the following objects are shared to reduce garbage
|
||||
|
||||
/**
|
||||
* Creates a parser for the given {@link String}.
|
||||
*
|
||||
|
@ -277,13 +335,14 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
|
||||
}
|
||||
|
||||
// the following objects are shared to reduce garbage
|
||||
|
||||
private final CSVFormat format;
|
||||
|
||||
/** A mapping of column names to column indices */
|
||||
private final Map<String, Integer> headerMap;
|
||||
|
||||
/** Preserve the column order to avoid re-computing it. */
|
||||
private final List<String> headerNames;
|
||||
|
||||
private final Lexer lexer;
|
||||
|
||||
private final CSVRecordIterator csvRecordIterator;
|
||||
|
@ -349,14 +408,15 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
*/
|
||||
@SuppressWarnings("resource")
|
||||
public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
|
||||
throws IOException {
|
||||
throws IOException {
|
||||
Assertions.notNull(reader, "reader");
|
||||
Assertions.notNull(format, "format");
|
||||
|
||||
this.format = format;
|
||||
this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
|
||||
this.csvRecordIterator = new CSVRecordIterator();
|
||||
this.headerMap = this.createHeaderMap();
|
||||
this.headerMap = createHeaderMap(); // 1st
|
||||
this.headerNames = createHeaderNames(this.headerMap); // 2nd
|
||||
this.characterOffset = characterOffset;
|
||||
this.recordNumber = recordNumber - 1;
|
||||
}
|
||||
|
@ -384,76 +444,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current line number in the input stream.
|
||||
*
|
||||
* <p>
|
||||
* <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
|
||||
* the record number.
|
||||
* </p>
|
||||
*
|
||||
* @return current line number
|
||||
*/
|
||||
public long getCurrentLineNumber() {
|
||||
return this.lexer.getCurrentLineNumber();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the first end-of-line string encountered.
|
||||
*
|
||||
* @return the first end-of-line string
|
||||
* @since 1.5
|
||||
*/
|
||||
public String getFirstEndOfLine() {
|
||||
return lexer.getFirstEol();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of the header map that iterates in column order.
|
||||
* <p>
|
||||
* The map keys are column names. The map values are 0-based indices.
|
||||
* </p>
|
||||
* @return a copy of the header map that iterates in column order.
|
||||
*/
|
||||
public Map<String, Integer> getHeaderMap() {
|
||||
return this.headerMap == null ? null : new LinkedHashMap<>(this.headerMap);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current record number in the input stream.
|
||||
*
|
||||
* <p>
|
||||
* <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
|
||||
* the line number.
|
||||
* </p>
|
||||
*
|
||||
* @return current record number
|
||||
*/
|
||||
public long getRecordNumber() {
|
||||
return this.recordNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the CSV input according to the given format and returns the content as a list of
|
||||
* {@link CSVRecord CSVRecords}.
|
||||
*
|
||||
* <p>
|
||||
* The returned content starts at the current parse-position in the stream.
|
||||
* </p>
|
||||
*
|
||||
* @return list of {@link CSVRecord CSVRecords}, may be empty
|
||||
* @throws IOException
|
||||
* on parse error or input read-failure
|
||||
*/
|
||||
public List<CSVRecord> getRecords() throws IOException {
|
||||
CSVRecord rec;
|
||||
final List<CSVRecord> records = new ArrayList<>();
|
||||
while ((rec = this.nextRecord()) != null) {
|
||||
records.add(rec);
|
||||
}
|
||||
return records;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the name to index mapping if the format defines a header.
|
||||
*
|
||||
|
@ -501,6 +491,76 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
return hdrMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current line number in the input stream.
|
||||
*
|
||||
* <p>
|
||||
* <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
|
||||
* the record number.
|
||||
* </p>
|
||||
*
|
||||
* @return current line number
|
||||
*/
|
||||
public long getCurrentLineNumber() {
|
||||
return this.lexer.getCurrentLineNumber();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the first end-of-line string encountered.
|
||||
*
|
||||
* @return the first end-of-line string
|
||||
* @since 1.5
|
||||
*/
|
||||
public String getFirstEndOfLine() {
|
||||
return lexer.getFirstEol();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of the header map.
|
||||
* <p>
|
||||
* The map keys are column names. The map values are 0-based indices.
|
||||
* </p>
|
||||
* @return a copy of the header map.
|
||||
*/
|
||||
public Map<String, Integer> getHeaderMap() {
|
||||
return this.headerMap == null ? null : new LinkedHashMap<>(this.headerMap);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current record number in the input stream.
|
||||
*
|
||||
* <p>
|
||||
* <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
|
||||
* the line number.
|
||||
* </p>
|
||||
*
|
||||
* @return current record number
|
||||
*/
|
||||
public long getRecordNumber() {
|
||||
return this.recordNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the CSV input according to the given format and returns the content as a list of
|
||||
* {@link CSVRecord CSVRecords}.
|
||||
*
|
||||
* <p>
|
||||
* The returned content starts at the current parse-position in the stream.
|
||||
* </p>
|
||||
*
|
||||
* @return list of {@link CSVRecord CSVRecords}, may be empty
|
||||
* @throws IOException
|
||||
* on parse error or input read-failure
|
||||
*/
|
||||
public List<CSVRecord> getRecords() throws IOException {
|
||||
CSVRecord rec;
|
||||
final List<CSVRecord> records = new ArrayList<>();
|
||||
while ((rec = this.nextRecord()) != null) {
|
||||
records.add(rec);
|
||||
}
|
||||
return records;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets whether this parser is closed.
|
||||
*
|
||||
|
@ -527,55 +587,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
return csvRecordIterator;
|
||||
}
|
||||
|
||||
class CSVRecordIterator implements Iterator<CSVRecord> {
|
||||
private CSVRecord current;
|
||||
|
||||
private CSVRecord getNextRecord() {
|
||||
try {
|
||||
return CSVParser.this.nextRecord();
|
||||
} catch (final IOException e) {
|
||||
throw new IllegalStateException(
|
||||
e.getClass().getSimpleName() + " reading next record: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (CSVParser.this.isClosed()) {
|
||||
return false;
|
||||
}
|
||||
if (this.current == null) {
|
||||
this.current = this.getNextRecord();
|
||||
}
|
||||
|
||||
return this.current != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CSVRecord next() {
|
||||
if (CSVParser.this.isClosed()) {
|
||||
throw new NoSuchElementException("CSVParser has been closed");
|
||||
}
|
||||
CSVRecord next = this.current;
|
||||
this.current = null;
|
||||
|
||||
if (next == null) {
|
||||
// hasNext() wasn't called before
|
||||
next = this.getNextRecord();
|
||||
if (next == null) {
|
||||
throw new NoSuchElementException("No more CSV records available");
|
||||
}
|
||||
}
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the next record from the current point in the stream.
|
||||
*
|
||||
|
@ -622,8 +633,8 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
if (!this.recordList.isEmpty()) {
|
||||
this.recordNumber++;
|
||||
final String comment = sb == null ? null : sb.toString();
|
||||
result = new CSVRecord(this.recordList.toArray(new String[this.recordList.size()]), this.headerMap, comment,
|
||||
this.recordNumber, startCharPosition);
|
||||
result = new CSVRecord(this.recordList.toArray(new String[this.recordList.size()]), this.headerMap,
|
||||
this.headerNames, comment, this.recordNumber, startCharPosition);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -18,9 +18,10 @@
|
|||
package org.apache.commons.csv;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
@ -40,7 +41,10 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
private final String comment;
|
||||
|
||||
/** The column name to index mapping. */
|
||||
private final Map<String, Integer> mapping;
|
||||
private final Map<String, Integer> headerMap;
|
||||
|
||||
/** The column order to avoid re-computing it. */
|
||||
private final List<String> headerNames;
|
||||
|
||||
/** The record number. */
|
||||
private final long recordNumber;
|
||||
|
@ -48,11 +52,12 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
/** The values of the record */
|
||||
private final String[] values;
|
||||
|
||||
CSVRecord(final String[] values, final Map<String, Integer> mapping, final String comment, final long recordNumber,
|
||||
final long characterPosition) {
|
||||
CSVRecord(final String[] values, final Map<String, Integer> headerMap, List<String> headerNames, final String comment,
|
||||
final long recordNumber, final long characterPosition) {
|
||||
this.recordNumber = recordNumber;
|
||||
this.values = values != null ? values : EMPTY_STRING_ARRAY;
|
||||
this.mapping = mapping;
|
||||
this.headerMap = headerMap;
|
||||
this.headerNames = headerNames;
|
||||
this.comment = comment;
|
||||
this.characterPosition = characterPosition;
|
||||
}
|
||||
|
@ -93,14 +98,10 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
* @see CSVFormat#withNullString(String)
|
||||
*/
|
||||
public String get(final String name) {
|
||||
if (mapping == null) {
|
||||
throw new IllegalStateException(
|
||||
"No header mapping was specified, the record values can't be accessed by name");
|
||||
}
|
||||
final Integer index = mapping.get(name);
|
||||
final Integer index = getIndex(name);
|
||||
if (index == null) {
|
||||
throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name,
|
||||
mapping.keySet()));
|
||||
headerMap.keySet()));
|
||||
}
|
||||
try {
|
||||
return values[index.intValue()];
|
||||
|
@ -133,6 +134,32 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
return comment;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of the header names that iterates in column order.
|
||||
*
|
||||
* @return a copy of the header names that iterates in column order.
|
||||
* @since 1.7
|
||||
*/
|
||||
public List<String> getHeaderNames() {
|
||||
return new ArrayList<>(headerNames);
|
||||
}
|
||||
|
||||
Integer getIndex(final String name) {
|
||||
if (headerMap == null) {
|
||||
throw new IllegalStateException(
|
||||
"No header mapping was specified, the record values can't be accessed by name");
|
||||
}
|
||||
return headerMap.get(name);
|
||||
}
|
||||
|
||||
String getName(final int index) {
|
||||
if (headerMap == null) {
|
||||
throw new IllegalStateException(
|
||||
"No header mapping was specified, the record values can't be accessed by name");
|
||||
}
|
||||
return headerNames.get(index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of this record in the parsed CSV file.
|
||||
*
|
||||
|
@ -148,20 +175,6 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
return recordNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether the record size matches the header size.
|
||||
*
|
||||
* <p>
|
||||
* Returns true if the sizes for this record match and false if not. Some programs can export files that fail this
|
||||
* test but still produce parsable files.
|
||||
* </p>
|
||||
*
|
||||
* @return true of this record is valid, false if not
|
||||
*/
|
||||
public boolean isConsistent() {
|
||||
return mapping == null || mapping.size() == values.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether this record has a comment, false otherwise.
|
||||
* Note that comments are attached to the following record.
|
||||
|
@ -175,6 +188,20 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
return comment != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether the record size matches the header size.
|
||||
*
|
||||
* <p>
|
||||
* Returns true if the sizes for this record match and false if not. Some programs can export files that fail this
|
||||
* test but still produce parsable files.
|
||||
* </p>
|
||||
*
|
||||
* @return true of this record is valid, false if not
|
||||
*/
|
||||
public boolean isConsistent() {
|
||||
return headerMap == null || headerMap.size() == values.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether a given column is mapped, i.e. its name has been defined to the parser.
|
||||
*
|
||||
|
@ -183,7 +210,7 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
* @return whether a given column is mapped.
|
||||
*/
|
||||
public boolean isMapped(final String name) {
|
||||
return mapping != null && mapping.containsKey(name);
|
||||
return headerMap != null && headerMap.containsKey(name);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -194,7 +221,7 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
* @return whether a given columns is mapped and has a value
|
||||
*/
|
||||
public boolean isSet(final String name) {
|
||||
return isMapped(name) && mapping.get(name).intValue() < values.length;
|
||||
return isMapped(name) && headerMap.get(name).intValue() < values.length;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -215,10 +242,10 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
* @return the given map.
|
||||
*/
|
||||
<M extends Map<String, String>> M putIn(final M map) {
|
||||
if (mapping == null) {
|
||||
if (headerMap == null) {
|
||||
return map;
|
||||
}
|
||||
for (final Entry<String, Integer> entry : mapping.entrySet()) {
|
||||
for (final Entry<String, Integer> entry : headerMap.entrySet()) {
|
||||
final int col = entry.getValue().intValue();
|
||||
if (col < values.length) {
|
||||
map.put(entry.getKey(), values[col]);
|
||||
|
@ -253,7 +280,7 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
* @return A new Map. The map is empty if the record has no headers.
|
||||
*/
|
||||
public Map<String, String> toMap() {
|
||||
return putIn(new HashMap<String, String>(values.length));
|
||||
return putIn(new LinkedHashMap<String, String>(values.length));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -264,7 +291,7 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "CSVRecord [comment=" + comment + ", mapping=" + mapping +
|
||||
return "CSVRecord [comment=" + comment + ", mapping=" + headerMap +
|
||||
", recordNumber=" + recordNumber + ", values=" +
|
||||
Arrays.toString(values) + "]";
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
@ -40,17 +41,17 @@ public class CSVRecordTest {
|
|||
|
||||
private String[] values;
|
||||
private CSVRecord record, recordWithHeader;
|
||||
private Map<String, Integer> header;
|
||||
private Map<String, Integer> headerMap;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
values = new String[] { "A", "B", "C" };
|
||||
record = new CSVRecord(values, null, null, 0, -1);
|
||||
header = new HashMap<>();
|
||||
header.put("first", Integer.valueOf(0));
|
||||
header.put("second", Integer.valueOf(1));
|
||||
header.put("third", Integer.valueOf(2));
|
||||
recordWithHeader = new CSVRecord(values, header, null, 0, -1);
|
||||
record = new CSVRecord(values, null, null, null, 0, -1);
|
||||
headerMap = new HashMap<>();
|
||||
headerMap.put("first", Integer.valueOf(0));
|
||||
headerMap.put("second", Integer.valueOf(1));
|
||||
headerMap.put("third", Integer.valueOf(2));
|
||||
recordWithHeader = new CSVRecord(values, headerMap, CSVParser.createHeaderNames(headerMap), null, 0, -1);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -69,7 +70,7 @@ public class CSVRecordTest {
|
|||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testGetStringInconsistentRecord() {
|
||||
header.put("fourth", Integer.valueOf(4));
|
||||
headerMap.put("fourth", Integer.valueOf(4));
|
||||
recordWithHeader.get("fourth");
|
||||
}
|
||||
|
||||
|
@ -103,7 +104,7 @@ public class CSVRecordTest {
|
|||
assertTrue(record.isConsistent());
|
||||
assertTrue(recordWithHeader.isConsistent());
|
||||
|
||||
header.put("fourth", Integer.valueOf(4));
|
||||
headerMap.put("fourth", Integer.valueOf(4));
|
||||
assertFalse(recordWithHeader.isConsistent());
|
||||
}
|
||||
|
||||
|
@ -161,6 +162,18 @@ public class CSVRecordTest {
|
|||
this.validateMap(map, true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetHeaderNames() {
|
||||
final Map<String, String> nameValueMap = this.recordWithHeader.toMap();
|
||||
final List<String> headerNames = this.recordWithHeader.getHeaderNames();
|
||||
Assert.assertEquals(nameValueMap.size(), headerNames.size());
|
||||
for (int i = 0; i < headerNames.size(); i++) {
|
||||
String name = headerNames.get(i);
|
||||
Assert.assertEquals(i, this.recordWithHeader.getIndex(name).intValue());
|
||||
Assert.assertEquals(name, this.recordWithHeader.getName(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToMapWithShortRecord() throws Exception {
|
||||
try (final CSVParser parser = CSVParser.parse("a,b", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
|
||||
|
|
Loading…
Reference in New Issue