diff --git a/src/changes/changes.xml b/src/changes/changes.xml index ce721ef8..2fa93696 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -45,6 +45,7 @@ Add support for java.sql.Clob. Update to Java 8. Escape quotes in CLOBs #39. + Cannot get headers in column order from CSVRecord. Update tests from H2 1.4.198 to 1.4.199. diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index a3decdff..6eb97f81 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -39,6 +39,7 @@ import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.TreeMap; +import java.util.stream.Collectors; /** * Parses CSV files according to the specified format. @@ -133,6 +134,61 @@ import java.util.TreeMap; */ public final class CSVParser implements Iterable, Closeable { + class CSVRecordIterator implements Iterator { + private CSVRecord current; + + private CSVRecord getNextRecord() { + try { + return CSVParser.this.nextRecord(); + } catch (final IOException e) { + throw new IllegalStateException( + e.getClass().getSimpleName() + " reading next record: " + e.toString(), e); + } + } + + @Override + public boolean hasNext() { + if (CSVParser.this.isClosed()) { + return false; + } + if (this.current == null) { + this.current = this.getNextRecord(); + } + + return this.current != null; + } + + @Override + public CSVRecord next() { + if (CSVParser.this.isClosed()) { + throw new NoSuchElementException("CSVParser has been closed"); + } + CSVRecord next = this.current; + this.current = null; + + if (next == null) { + // hasNext() wasn't called before + next = this.getNextRecord(); + if (next == null) { + throw new NoSuchElementException("No more CSV records available"); + } + } + + return next; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + static List createHeaderNames(final Map headerMap) { + return headerMap == null ? null + : headerMap.entrySet().stream().sorted(Map.Entry.comparingByValue()).map(Map.Entry::getKey) + .collect(Collectors.toList()); + } + /** * Creates a parser for the given {@link File}. * @@ -229,6 +285,8 @@ public final class CSVParser implements Iterable, Closeable { return new CSVParser(reader, format); } + // the following objects are shared to reduce garbage + /** * Creates a parser for the given {@link String}. * @@ -277,13 +335,14 @@ public final class CSVParser implements Iterable, Closeable { return new CSVParser(new InputStreamReader(url.openStream(), charset), format); } - // the following objects are shared to reduce garbage - private final CSVFormat format; /** A mapping of column names to column indices */ private final Map headerMap; + /** Preserve the column order to avoid re-computing it. */ + private final List headerNames; + private final Lexer lexer; private final CSVRecordIterator csvRecordIterator; @@ -349,14 +408,15 @@ public final class CSVParser implements Iterable, Closeable { */ @SuppressWarnings("resource") public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) - throws IOException { + throws IOException { Assertions.notNull(reader, "reader"); Assertions.notNull(format, "format"); this.format = format; this.lexer = new Lexer(format, new ExtendedBufferedReader(reader)); this.csvRecordIterator = new CSVRecordIterator(); - this.headerMap = this.createHeaderMap(); + this.headerMap = createHeaderMap(); // 1st + this.headerNames = createHeaderNames(this.headerMap); // 2nd this.characterOffset = characterOffset; this.recordNumber = recordNumber - 1; } @@ -384,76 +444,6 @@ public final class CSVParser implements Iterable, Closeable { } } - /** - * Returns the current line number in the input stream. - * - *

- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to - * the record number. - *

- * - * @return current line number - */ - public long getCurrentLineNumber() { - return this.lexer.getCurrentLineNumber(); - } - - /** - * Gets the first end-of-line string encountered. - * - * @return the first end-of-line string - * @since 1.5 - */ - public String getFirstEndOfLine() { - return lexer.getFirstEol(); - } - - /** - * Returns a copy of the header map that iterates in column order. - *

- * The map keys are column names. The map values are 0-based indices. - *

- * @return a copy of the header map that iterates in column order. - */ - public Map getHeaderMap() { - return this.headerMap == null ? null : new LinkedHashMap<>(this.headerMap); - } - - /** - * Returns the current record number in the input stream. - * - *

- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to - * the line number. - *

- * - * @return current record number - */ - public long getRecordNumber() { - return this.recordNumber; - } - - /** - * Parses the CSV input according to the given format and returns the content as a list of - * {@link CSVRecord CSVRecords}. - * - *

- * The returned content starts at the current parse-position in the stream. - *

- * - * @return list of {@link CSVRecord CSVRecords}, may be empty - * @throws IOException - * on parse error or input read-failure - */ - public List getRecords() throws IOException { - CSVRecord rec; - final List records = new ArrayList<>(); - while ((rec = this.nextRecord()) != null) { - records.add(rec); - } - return records; - } - /** * Creates the name to index mapping if the format defines a header. * @@ -501,6 +491,76 @@ public final class CSVParser implements Iterable, Closeable { return hdrMap; } + /** + * Returns the current line number in the input stream. + * + *

+ * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to + * the record number. + *

+ * + * @return current line number + */ + public long getCurrentLineNumber() { + return this.lexer.getCurrentLineNumber(); + } + + /** + * Gets the first end-of-line string encountered. + * + * @return the first end-of-line string + * @since 1.5 + */ + public String getFirstEndOfLine() { + return lexer.getFirstEol(); + } + + /** + * Returns a copy of the header map. + *

+ * The map keys are column names. The map values are 0-based indices. + *

+ * @return a copy of the header map. + */ + public Map getHeaderMap() { + return this.headerMap == null ? null : new LinkedHashMap<>(this.headerMap); + } + + /** + * Returns the current record number in the input stream. + * + *

+ * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to + * the line number. + *

+ * + * @return current record number + */ + public long getRecordNumber() { + return this.recordNumber; + } + + /** + * Parses the CSV input according to the given format and returns the content as a list of + * {@link CSVRecord CSVRecords}. + * + *

+ * The returned content starts at the current parse-position in the stream. + *

+ * + * @return list of {@link CSVRecord CSVRecords}, may be empty + * @throws IOException + * on parse error or input read-failure + */ + public List getRecords() throws IOException { + CSVRecord rec; + final List records = new ArrayList<>(); + while ((rec = this.nextRecord()) != null) { + records.add(rec); + } + return records; + } + /** * Gets whether this parser is closed. * @@ -527,55 +587,6 @@ public final class CSVParser implements Iterable, Closeable { return csvRecordIterator; } - class CSVRecordIterator implements Iterator { - private CSVRecord current; - - private CSVRecord getNextRecord() { - try { - return CSVParser.this.nextRecord(); - } catch (final IOException e) { - throw new IllegalStateException( - e.getClass().getSimpleName() + " reading next record: " + e.toString(), e); - } - } - - @Override - public boolean hasNext() { - if (CSVParser.this.isClosed()) { - return false; - } - if (this.current == null) { - this.current = this.getNextRecord(); - } - - return this.current != null; - } - - @Override - public CSVRecord next() { - if (CSVParser.this.isClosed()) { - throw new NoSuchElementException("CSVParser has been closed"); - } - CSVRecord next = this.current; - this.current = null; - - if (next == null) { - // hasNext() wasn't called before - next = this.getNextRecord(); - if (next == null) { - throw new NoSuchElementException("No more CSV records available"); - } - } - - return next; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - /** * Parses the next record from the current point in the stream. * @@ -622,8 +633,8 @@ public final class CSVParser implements Iterable, Closeable { if (!this.recordList.isEmpty()) { this.recordNumber++; final String comment = sb == null ? null : sb.toString(); - result = new CSVRecord(this.recordList.toArray(new String[this.recordList.size()]), this.headerMap, comment, - this.recordNumber, startCharPosition); + result = new CSVRecord(this.recordList.toArray(new String[this.recordList.size()]), this.headerMap, + this.headerNames, comment, this.recordNumber, startCharPosition); } return result; } diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java index 34a3ba21..39844058 100644 --- a/src/main/java/org/apache/commons/csv/CSVRecord.java +++ b/src/main/java/org/apache/commons/csv/CSVRecord.java @@ -18,9 +18,10 @@ package org.apache.commons.csv; import java.io.Serializable; +import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -40,7 +41,10 @@ public final class CSVRecord implements Serializable, Iterable { private final String comment; /** The column name to index mapping. */ - private final Map mapping; + private final Map headerMap; + + /** The column order to avoid re-computing it. */ + private final List headerNames; /** The record number. */ private final long recordNumber; @@ -48,11 +52,12 @@ public final class CSVRecord implements Serializable, Iterable { /** The values of the record */ private final String[] values; - CSVRecord(final String[] values, final Map mapping, final String comment, final long recordNumber, - final long characterPosition) { + CSVRecord(final String[] values, final Map headerMap, List headerNames, final String comment, + final long recordNumber, final long characterPosition) { this.recordNumber = recordNumber; this.values = values != null ? values : EMPTY_STRING_ARRAY; - this.mapping = mapping; + this.headerMap = headerMap; + this.headerNames = headerNames; this.comment = comment; this.characterPosition = characterPosition; } @@ -93,14 +98,10 @@ public final class CSVRecord implements Serializable, Iterable { * @see CSVFormat#withNullString(String) */ public String get(final String name) { - if (mapping == null) { - throw new IllegalStateException( - "No header mapping was specified, the record values can't be accessed by name"); - } - final Integer index = mapping.get(name); + final Integer index = getIndex(name); if (index == null) { throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, - mapping.keySet())); + headerMap.keySet())); } try { return values[index.intValue()]; @@ -133,6 +134,32 @@ public final class CSVRecord implements Serializable, Iterable { return comment; } + /** + * Returns a copy of the header names that iterates in column order. + * + * @return a copy of the header names that iterates in column order. + * @since 1.7 + */ + public List getHeaderNames() { + return new ArrayList<>(headerNames); + } + + Integer getIndex(final String name) { + if (headerMap == null) { + throw new IllegalStateException( + "No header mapping was specified, the record values can't be accessed by name"); + } + return headerMap.get(name); + } + + String getName(final int index) { + if (headerMap == null) { + throw new IllegalStateException( + "No header mapping was specified, the record values can't be accessed by name"); + } + return headerNames.get(index); + } + /** * Returns the number of this record in the parsed CSV file. * @@ -148,20 +175,6 @@ public final class CSVRecord implements Serializable, Iterable { return recordNumber; } - /** - * Tells whether the record size matches the header size. - * - *

- * Returns true if the sizes for this record match and false if not. Some programs can export files that fail this - * test but still produce parsable files. - *

- * - * @return true of this record is valid, false if not - */ - public boolean isConsistent() { - return mapping == null || mapping.size() == values.length; - } - /** * Checks whether this record has a comment, false otherwise. * Note that comments are attached to the following record. @@ -175,6 +188,20 @@ public final class CSVRecord implements Serializable, Iterable { return comment != null; } + /** + * Tells whether the record size matches the header size. + * + *

+ * Returns true if the sizes for this record match and false if not. Some programs can export files that fail this + * test but still produce parsable files. + *

+ * + * @return true of this record is valid, false if not + */ + public boolean isConsistent() { + return headerMap == null || headerMap.size() == values.length; + } + /** * Checks whether a given column is mapped, i.e. its name has been defined to the parser. * @@ -183,7 +210,7 @@ public final class CSVRecord implements Serializable, Iterable { * @return whether a given column is mapped. */ public boolean isMapped(final String name) { - return mapping != null && mapping.containsKey(name); + return headerMap != null && headerMap.containsKey(name); } /** @@ -194,7 +221,7 @@ public final class CSVRecord implements Serializable, Iterable { * @return whether a given columns is mapped and has a value */ public boolean isSet(final String name) { - return isMapped(name) && mapping.get(name).intValue() < values.length; + return isMapped(name) && headerMap.get(name).intValue() < values.length; } /** @@ -215,10 +242,10 @@ public final class CSVRecord implements Serializable, Iterable { * @return the given map. */ > M putIn(final M map) { - if (mapping == null) { + if (headerMap == null) { return map; } - for (final Entry entry : mapping.entrySet()) { + for (final Entry entry : headerMap.entrySet()) { final int col = entry.getValue().intValue(); if (col < values.length) { map.put(entry.getKey(), values[col]); @@ -253,7 +280,7 @@ public final class CSVRecord implements Serializable, Iterable { * @return A new Map. The map is empty if the record has no headers. */ public Map toMap() { - return putIn(new HashMap(values.length)); + return putIn(new LinkedHashMap(values.length)); } /** @@ -264,7 +291,7 @@ public final class CSVRecord implements Serializable, Iterable { */ @Override public String toString() { - return "CSVRecord [comment=" + comment + ", mapping=" + mapping + + return "CSVRecord [comment=" + comment + ", mapping=" + headerMap + ", recordNumber=" + recordNumber + ", values=" + Arrays.toString(values) + "]"; } diff --git a/src/test/java/org/apache/commons/csv/CSVRecordTest.java b/src/test/java/org/apache/commons/csv/CSVRecordTest.java index 6347cc51..dab4d550 100644 --- a/src/test/java/org/apache/commons/csv/CSVRecordTest.java +++ b/src/test/java/org/apache/commons/csv/CSVRecordTest.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; @@ -40,17 +41,17 @@ public class CSVRecordTest { private String[] values; private CSVRecord record, recordWithHeader; - private Map header; + private Map headerMap; @Before public void setUp() throws Exception { values = new String[] { "A", "B", "C" }; - record = new CSVRecord(values, null, null, 0, -1); - header = new HashMap<>(); - header.put("first", Integer.valueOf(0)); - header.put("second", Integer.valueOf(1)); - header.put("third", Integer.valueOf(2)); - recordWithHeader = new CSVRecord(values, header, null, 0, -1); + record = new CSVRecord(values, null, null, null, 0, -1); + headerMap = new HashMap<>(); + headerMap.put("first", Integer.valueOf(0)); + headerMap.put("second", Integer.valueOf(1)); + headerMap.put("third", Integer.valueOf(2)); + recordWithHeader = new CSVRecord(values, headerMap, CSVParser.createHeaderNames(headerMap), null, 0, -1); } @Test @@ -69,7 +70,7 @@ public class CSVRecordTest { @Test(expected = IllegalArgumentException.class) public void testGetStringInconsistentRecord() { - header.put("fourth", Integer.valueOf(4)); + headerMap.put("fourth", Integer.valueOf(4)); recordWithHeader.get("fourth"); } @@ -103,7 +104,7 @@ public class CSVRecordTest { assertTrue(record.isConsistent()); assertTrue(recordWithHeader.isConsistent()); - header.put("fourth", Integer.valueOf(4)); + headerMap.put("fourth", Integer.valueOf(4)); assertFalse(recordWithHeader.isConsistent()); } @@ -161,6 +162,18 @@ public class CSVRecordTest { this.validateMap(map, true); } + @Test + public void testGetHeaderNames() { + final Map nameValueMap = this.recordWithHeader.toMap(); + final List headerNames = this.recordWithHeader.getHeaderNames(); + Assert.assertEquals(nameValueMap.size(), headerNames.size()); + for (int i = 0; i < headerNames.size(); i++) { + String name = headerNames.get(i); + Assert.assertEquals(i, this.recordWithHeader.getIndex(name).intValue()); + Assert.assertEquals(name, this.recordWithHeader.getName(i)); + } + } + @Test public void testToMapWithShortRecord() throws Exception { try (final CSVParser parser = CSVParser.parse("a,b", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {