[CSV-239] Cannot get headers in column order from CSVRecord.
- Redo so that the record tracks its source parser where call sites can find metadata. This avoids adding slots to the record class itself.
This commit is contained in:
parent
9dcad06c00
commit
ef51c1ecad
|
@ -340,7 +340,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
/** A mapping of column names to column indices */
|
||||
private final Map<String, Integer> headerMap;
|
||||
|
||||
/** Preserve the column order to avoid re-computing it. */
|
||||
/** The column order to avoid re-computing it. */
|
||||
private final List<String> headerNames;
|
||||
|
||||
private final Lexer lexer;
|
||||
|
@ -444,6 +444,12 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
private Map<String, Integer> createEmptyHeaderMap() {
|
||||
return this.format.getIgnoreHeaderCase() ?
|
||||
new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
|
||||
new TreeMap<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the name to index mapping if the format defines a header.
|
||||
*
|
||||
|
@ -454,10 +460,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
Map<String, Integer> hdrMap = null;
|
||||
final String[] formatHeader = this.format.getHeader();
|
||||
if (formatHeader != null) {
|
||||
hdrMap = this.format.getIgnoreHeaderCase() ?
|
||||
new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
|
||||
new TreeMap<>();
|
||||
|
||||
hdrMap = createEmptyHeaderMap();
|
||||
String[] headerRecord = null;
|
||||
if (formatHeader.length == 0) {
|
||||
// read the header from the first line of the file
|
||||
|
@ -523,7 +526,31 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
* @return a copy of the header map.
|
||||
*/
|
||||
public Map<String, Integer> getHeaderMap() {
|
||||
return this.headerMap == null ? null : new LinkedHashMap<>(this.headerMap);
|
||||
if (this.headerMap == null) {
|
||||
return null;
|
||||
}
|
||||
final Map<String, Integer> map = createEmptyHeaderMap();
|
||||
map.putAll(this.headerMap);
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the header map.
|
||||
*
|
||||
* @return the header map.
|
||||
*/
|
||||
Map<String, Integer> getHeaderMapRaw() {
|
||||
return this.headerMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of the header names that iterates in column order.
|
||||
*
|
||||
* @return a copy of the header names that iterates in column order.
|
||||
* @since 1.7
|
||||
*/
|
||||
public List<String> getHeaderNames() {
|
||||
return new ArrayList<>(headerNames);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -633,8 +660,8 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
if (!this.recordList.isEmpty()) {
|
||||
this.recordNumber++;
|
||||
final String comment = sb == null ? null : sb.toString();
|
||||
result = new CSVRecord(this.recordList.toArray(new String[this.recordList.size()]), this.headerMap,
|
||||
this.headerNames, comment, this.recordNumber, startCharPosition);
|
||||
result = new CSVRecord(this, this.recordList.toArray(new String[this.recordList.size()]),
|
||||
comment, this.recordNumber, startCharPosition);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
package org.apache.commons.csv;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
|
@ -40,24 +39,20 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
/** The accumulated comments (if any) */
|
||||
private final String comment;
|
||||
|
||||
/** The column name to index mapping. */
|
||||
private final Map<String, Integer> headerMap;
|
||||
|
||||
/** The column order to avoid re-computing it. */
|
||||
private final List<String> headerNames;
|
||||
|
||||
/** The record number. */
|
||||
private final long recordNumber;
|
||||
|
||||
/** The values of the record */
|
||||
private final String[] values;
|
||||
|
||||
CSVRecord(final String[] values, final Map<String, Integer> headerMap, List<String> headerNames, final String comment,
|
||||
final long recordNumber, final long characterPosition) {
|
||||
/** The parser that originates this record. */
|
||||
private final CSVParser parser;
|
||||
|
||||
CSVRecord(final CSVParser parser, final String[] values, final String comment, final long recordNumber,
|
||||
final long characterPosition) {
|
||||
this.recordNumber = recordNumber;
|
||||
this.values = values != null ? values : EMPTY_STRING_ARRAY;
|
||||
this.headerMap = headerMap;
|
||||
this.headerNames = headerNames;
|
||||
this.parser = parser;
|
||||
this.comment = comment;
|
||||
this.characterPosition = characterPosition;
|
||||
}
|
||||
|
@ -84,6 +79,10 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
return values[i];
|
||||
}
|
||||
|
||||
private Map<String, Integer> getHeaderMapRaw() {
|
||||
return parser.getHeaderMapRaw();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a value by name.
|
||||
*
|
||||
|
@ -98,7 +97,12 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
* @see CSVFormat#withNullString(String)
|
||||
*/
|
||||
public String get(final String name) {
|
||||
final Integer index = getIndex(name);
|
||||
final Map<String, Integer> headerMap = getHeaderMapRaw();
|
||||
if (headerMap == null) {
|
||||
throw new IllegalStateException(
|
||||
"No header mapping was specified, the record values can't be accessed by name");
|
||||
}
|
||||
final Integer index = headerMap.get(name);
|
||||
if (index == null) {
|
||||
throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name,
|
||||
headerMap.keySet()));
|
||||
|
@ -135,29 +139,13 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of the header names that iterates in column order.
|
||||
*
|
||||
* @return a copy of the header names that iterates in column order.
|
||||
* Returns the parser.
|
||||
*
|
||||
* @return the parser.
|
||||
* @since 1.7
|
||||
*/
|
||||
public List<String> getHeaderNames() {
|
||||
return new ArrayList<>(headerNames);
|
||||
}
|
||||
|
||||
Integer getIndex(final String name) {
|
||||
if (headerMap == null) {
|
||||
throw new IllegalStateException(
|
||||
"No header mapping was specified, the record values can't be accessed by name");
|
||||
}
|
||||
return headerMap.get(name);
|
||||
}
|
||||
|
||||
String getName(final int index) {
|
||||
if (headerMap == null) {
|
||||
throw new IllegalStateException(
|
||||
"No header mapping was specified, the record values can't be accessed by name");
|
||||
}
|
||||
return headerNames.get(index);
|
||||
public CSVParser getCSVParser() {
|
||||
return parser;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -199,6 +187,7 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
* @return true of this record is valid, false if not
|
||||
*/
|
||||
public boolean isConsistent() {
|
||||
final Map<String, Integer> headerMap = getHeaderMapRaw();
|
||||
return headerMap == null || headerMap.size() == values.length;
|
||||
}
|
||||
|
||||
|
@ -210,6 +199,7 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
* @return whether a given column is mapped.
|
||||
*/
|
||||
public boolean isMapped(final String name) {
|
||||
final Map<String, Integer> headerMap = getHeaderMapRaw();
|
||||
return headerMap != null && headerMap.containsKey(name);
|
||||
}
|
||||
|
||||
|
@ -221,7 +211,7 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
* @return whether a given columns is mapped and has a value
|
||||
*/
|
||||
public boolean isSet(final String name) {
|
||||
return isMapped(name) && headerMap.get(name).intValue() < values.length;
|
||||
return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -242,10 +232,10 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
* @return the given map.
|
||||
*/
|
||||
<M extends Map<String, String>> M putIn(final M map) {
|
||||
if (headerMap == null) {
|
||||
if (getHeaderMapRaw() == null) {
|
||||
return map;
|
||||
}
|
||||
for (final Entry<String, Integer> entry : headerMap.entrySet()) {
|
||||
for (final Entry<String, Integer> entry : getHeaderMapRaw().entrySet()) {
|
||||
final int col = entry.getValue().intValue();
|
||||
if (col < values.length) {
|
||||
map.put(entry.getKey(), values[col]);
|
||||
|
@ -291,9 +281,8 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "CSVRecord [comment=" + comment + ", mapping=" + headerMap +
|
||||
", recordNumber=" + recordNumber + ", values=" +
|
||||
Arrays.toString(values) + "]";
|
||||
return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values="
|
||||
+ Arrays.toString(values) + "]";
|
||||
}
|
||||
|
||||
String[] values() {
|
||||
|
|
|
@ -490,6 +490,20 @@ public class CSVParserTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetHeaderNames() throws IOException {
|
||||
try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z",
|
||||
CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
|
||||
final Map<String, Integer> nameIndexMap = parser.getHeaderMap();
|
||||
final List<String> headerNames = parser.getHeaderNames();
|
||||
Assert.assertEquals(nameIndexMap.size(), headerNames.size());
|
||||
for (int i = 0; i < headerNames.size(); i++) {
|
||||
final String name = headerNames.get(i);
|
||||
Assert.assertEquals(i, nameIndexMap.get(name).intValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetLine() throws IOException {
|
||||
try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
|
||||
|
@ -678,9 +692,9 @@ public class CSVParserTest {
|
|||
|
||||
@Test
|
||||
public void testIgnoreCaseHeaderMapping() throws Exception {
|
||||
final Reader in = new StringReader("1,2,3");
|
||||
final Reader reader = new StringReader("1,2,3");
|
||||
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("One", "TWO", "three").withIgnoreHeaderCase()
|
||||
.parse(in).iterator();
|
||||
.parse(reader).iterator();
|
||||
final CSVRecord record = records.next();
|
||||
assertEquals("1", record.get("one"));
|
||||
assertEquals("2", record.get("two"));
|
||||
|
@ -742,10 +756,10 @@ public class CSVParserTest {
|
|||
// Iterator hasNext() shouldn't break sequence
|
||||
CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows));
|
||||
int recordNumber = 0;
|
||||
Iterator<CSVRecord> iter = parser.iterator();
|
||||
final Iterator<CSVRecord> iter = parser.iterator();
|
||||
recordNumber = 0;
|
||||
while (iter.hasNext()) {
|
||||
CSVRecord record = iter.next();
|
||||
final CSVRecord record = iter.next();
|
||||
recordNumber++;
|
||||
assertEquals(String.valueOf(recordNumber), record.get(0));
|
||||
if (recordNumber >= 2) {
|
||||
|
@ -754,7 +768,7 @@ public class CSVParserTest {
|
|||
}
|
||||
iter.hasNext();
|
||||
while (iter.hasNext()) {
|
||||
CSVRecord record = iter.next();
|
||||
final CSVRecord record = iter.next();
|
||||
recordNumber++;
|
||||
assertEquals(String.valueOf(recordNumber), record.get(0));
|
||||
}
|
||||
|
@ -762,14 +776,14 @@ public class CSVParserTest {
|
|||
// Consecutive enhanced for loops shouldn't break sequence
|
||||
parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows));
|
||||
recordNumber = 0;
|
||||
for (CSVRecord record : parser) {
|
||||
for (final CSVRecord record : parser) {
|
||||
recordNumber++;
|
||||
assertEquals(String.valueOf(recordNumber), record.get(0));
|
||||
if (recordNumber >= 2) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (CSVRecord record : parser) {
|
||||
for (final CSVRecord record : parser) {
|
||||
recordNumber++;
|
||||
assertEquals(String.valueOf(recordNumber), record.get(0));
|
||||
}
|
||||
|
@ -777,7 +791,7 @@ public class CSVParserTest {
|
|||
// Consecutive enhanced for loops with hasNext() peeking shouldn't break sequence
|
||||
parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows));
|
||||
recordNumber = 0;
|
||||
for (CSVRecord record : parser) {
|
||||
for (final CSVRecord record : parser) {
|
||||
recordNumber++;
|
||||
assertEquals(String.valueOf(recordNumber), record.get(0));
|
||||
if (recordNumber >= 2) {
|
||||
|
@ -785,7 +799,7 @@ public class CSVParserTest {
|
|||
}
|
||||
}
|
||||
parser.iterator().hasNext();
|
||||
for (CSVRecord record : parser) {
|
||||
for (final CSVRecord record : parser) {
|
||||
recordNumber++;
|
||||
assertEquals(String.valueOf(recordNumber), record.get(0));
|
||||
}
|
||||
|
@ -799,7 +813,7 @@ public class CSVParserTest {
|
|||
assertEquals(4, records.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception {
|
||||
final Reader in = new StringReader("a,b,c\n1,2\nx,y,z");
|
||||
|
|
|
@ -82,7 +82,7 @@ public class CSVPrinterTest {
|
|||
}
|
||||
|
||||
private final String recordSeparator = CSVFormat.DEFAULT.getRecordSeparator();
|
||||
private String longText2;
|
||||
private String longText2;
|
||||
|
||||
private void doOneRandom(final CSVFormat format) throws Exception {
|
||||
final Random r = new Random();
|
||||
|
|
|
@ -23,21 +23,23 @@ import static org.junit.Assert.assertNull;
|
|||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class CSVRecordTest {
|
||||
|
||||
private enum EnumFixture { UNKNOWN_COLUMN }
|
||||
private enum EnumFixture {
|
||||
UNKNOWN_COLUMN
|
||||
}
|
||||
|
||||
private String[] values;
|
||||
private CSVRecord record, recordWithHeader;
|
||||
|
@ -46,12 +48,15 @@ public class CSVRecordTest {
|
|||
@Before
|
||||
public void setUp() throws Exception {
|
||||
values = new String[] { "A", "B", "C" };
|
||||
record = new CSVRecord(values, null, null, null, 0, -1);
|
||||
headerMap = new HashMap<>();
|
||||
headerMap.put("first", Integer.valueOf(0));
|
||||
headerMap.put("second", Integer.valueOf(1));
|
||||
headerMap.put("third", Integer.valueOf(2));
|
||||
recordWithHeader = new CSVRecord(values, headerMap, CSVParser.createHeaderNames(headerMap), null, 0, -1);
|
||||
final String rowData = StringUtils.join(values, ',');
|
||||
try (final CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(rowData))) {
|
||||
record = parser.iterator().next();
|
||||
}
|
||||
final String[] headers = { "first", "second", "third" };
|
||||
try (final CSVParser parser = CSVFormat.DEFAULT.withHeader(headers).parse(new StringReader(rowData))) {
|
||||
recordWithHeader = parser.iterator().next();
|
||||
headerMap = parser.getHeaderMap();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -103,9 +108,22 @@ public class CSVRecordTest {
|
|||
public void testIsConsistent() {
|
||||
assertTrue(record.isConsistent());
|
||||
assertTrue(recordWithHeader.isConsistent());
|
||||
final Map<String, Integer> map = recordWithHeader.getCSVParser().getHeaderMap();
|
||||
map.put("fourth", Integer.valueOf(4));
|
||||
// We are working on a copy of the map, so the record should still be OK.
|
||||
assertTrue(recordWithHeader.isConsistent());
|
||||
}
|
||||
|
||||
headerMap.put("fourth", Integer.valueOf(4));
|
||||
assertFalse(recordWithHeader.isConsistent());
|
||||
@Test
|
||||
public void testIsInconsistent() throws IOException {
|
||||
final String[] headers = { "first", "second", "third" };
|
||||
final String rowData = StringUtils.join(values, ',');
|
||||
try (final CSVParser parser = CSVFormat.DEFAULT.withHeader(headers).parse(new StringReader(rowData))) {
|
||||
final Map<String, Integer> map = parser.getHeaderMapRaw();
|
||||
final CSVRecord record1 = parser.iterator().next();
|
||||
map.put("fourth", Integer.valueOf(4));
|
||||
assertFalse(record1.isConsistent());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -162,18 +180,6 @@ public class CSVRecordTest {
|
|||
this.validateMap(map, true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetHeaderNames() {
|
||||
final Map<String, String> nameValueMap = this.recordWithHeader.toMap();
|
||||
final List<String> headerNames = this.recordWithHeader.getHeaderNames();
|
||||
Assert.assertEquals(nameValueMap.size(), headerNames.size());
|
||||
for (int i = 0; i < headerNames.size(); i++) {
|
||||
String name = headerNames.get(i);
|
||||
Assert.assertEquals(i, this.recordWithHeader.getIndex(name).intValue());
|
||||
Assert.assertEquals(name, this.recordWithHeader.getName(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToMapWithShortRecord() throws Exception {
|
||||
try (final CSVParser parser = CSVParser.parse("a,b", CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
|
||||
|
|
|
@ -246,6 +246,7 @@ public class PerformanceTest {
|
|||
|
||||
private static void testParseCommonsCSV() throws Exception {
|
||||
testParser("CSV", new CSVParserFactory() {
|
||||
@Override
|
||||
public CSVParser createParser() throws IOException {
|
||||
return new CSVParser(createReader(), format);
|
||||
}
|
||||
|
@ -254,6 +255,7 @@ public class PerformanceTest {
|
|||
|
||||
private static void testParsePath() throws Exception {
|
||||
testParser("CSV-PATH", new CSVParserFactory() {
|
||||
@Override
|
||||
public CSVParser createParser() throws IOException {
|
||||
return CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format);
|
||||
}
|
||||
|
@ -262,6 +264,7 @@ public class PerformanceTest {
|
|||
|
||||
private static void testParsePathDoubleBuffering() throws Exception {
|
||||
testParser("CSV-PATH-DB", new CSVParserFactory() {
|
||||
@Override
|
||||
public CSVParser createParser() throws IOException {
|
||||
return CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format);
|
||||
}
|
||||
|
@ -270,6 +273,7 @@ public class PerformanceTest {
|
|||
|
||||
private static void testParseURL() throws Exception {
|
||||
testParser("CSV-URL", new CSVParserFactory() {
|
||||
@Override
|
||||
public CSVParser createParser() throws IOException {
|
||||
//NOTE: URL will always return a BufferedInputStream.
|
||||
return CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format);
|
||||
|
|
Loading…
Reference in New Issue