Revert back to NOT skipping a record when withHeader is called with a non-empty array. Add skipHeaderRecord setting to CSVFormat and use when headers are initialized.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1508933 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Gary D. Gregory 2013-07-31 15:39:57 +00:00
parent e6759b8c10
commit 97d34575a1
3 changed files with 54 additions and 26 deletions

View File

@ -53,6 +53,7 @@ import java.util.Set;
public class CSVFormat implements Serializable {
private static final long serialVersionUID = 1L;
/**
* Returns true if the given character is a line break character.
*
@ -76,6 +77,7 @@ public class CSVFormat implements Serializable {
private final String recordSeparator; // for outputs
private final String nullString;
private final String[] header;
private final boolean skipHeaderRecord;
/**
* Standard comma separated format, as for {@link #RFC4180} but allowing empty lines.
@ -90,7 +92,7 @@ public class CSVFormat implements Serializable {
* <li>withIgnoreEmptyLines(true)</li>
* </ul>
*/
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null);
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, false);
/**
* Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
@ -166,7 +168,7 @@ public class CSVFormat implements Serializable {
* @throws IllegalArgumentException if the delimiter is a line break character
*/
public static CSVFormat newFormat(final char delimiter) {
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null);
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, false);
}
/**
@ -203,6 +205,7 @@ public class CSVFormat implements Serializable {
* the line separator to use for output
* @param header
* the header
* @param skipHeaderRecord TODO
* @throws IllegalArgumentException if the delimiter is a line break character
*/
// package protected to give access without needing a synthetic accessor
@ -210,7 +213,7 @@ public class CSVFormat implements Serializable {
final Quote quotePolicy, final Character commentStart,
final Character escape, final boolean ignoreSurroundingSpaces,
final boolean ignoreEmptyLines, final String recordSeparator,
final String nullString, final String[] header) {
final String nullString, final String[] header, boolean skipHeaderRecord) {
if (isLineBreak(delimiter)) {
throw new IllegalArgumentException("The delimiter cannot be a line break");
}
@ -224,12 +227,13 @@ public class CSVFormat implements Serializable {
this.recordSeparator = recordSeparator;
this.nullString = nullString;
this.header = header == null ? null : header.clone();
this.skipHeaderRecord = skipHeaderRecord;
}
CSVFormat(final CSVFormat format) {
this(format.getDelimiter(), format.getQuoteChar(), format.getQuotePolicy(), format.getCommentStart(),
format.getEscape(), format.getIgnoreSurroundingSpaces(), format.getIgnoreEmptyLines(),
format.getRecordSeparator(), format.getNullString(), format.getHeader());
format.getRecordSeparator(), format.getNullString(), format.getHeader(), format.getSkipHeaderRecord());
}
@Override
@ -409,6 +413,15 @@ public class CSVFormat implements Serializable {
return recordSeparator;
}
/**
* Returns whether to skip the header record.
*
* @return whether to skip the header record.
*/
public boolean getSkipHeaderRecord() {
return skipHeaderRecord;
}
@Override
public int hashCode()
{
@ -573,7 +586,7 @@ public class CSVFormat implements Serializable {
throw new IllegalArgumentException("The comment start character cannot be a line break");
}
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
}
/**
@ -590,7 +603,7 @@ public class CSVFormat implements Serializable {
throw new IllegalArgumentException("The delimiter cannot be a line break");
}
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
}
/**
@ -620,33 +633,29 @@ public class CSVFormat implements Serializable {
throw new IllegalArgumentException("The escape character cannot be a line break");
}
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
}
/**
* Sets the header of the format. The header can either be parsed automatically from the input file with:
*
* <pre>
* CSVFormat format = aformat.withHeader();
* </pre>
* CSVFormat format = aformat.withHeader();</pre>
*
* or specified manually with:
*
* <pre>
* CSVFormat format = aformat.withHeader(&quot;name&quot;, &quot;email&quot;, &quot;phone&quot;);
* </pre>
*
* When this option is is set to any non-null value, the first record is the first <em>data</em> record, not the
* header record.
* CSVFormat format = aformat.withHeader(&quot;name&quot;, &quot;email&quot;, &quot;phone&quot;);</pre>
*
* @param header
* the header, <tt>null</tt> if disabled, empty if parsed automatically, user specified otherwise.
*
* @return A new CSVFormat that is equal to this but with the specified header
* @see #withSkipHeaderRecord(boolean)
*/
public CSVFormat withHeader(final String... header) {
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
}
/**
@ -659,7 +668,7 @@ public class CSVFormat implements Serializable {
*/
public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
}
/**
@ -672,7 +681,7 @@ public class CSVFormat implements Serializable {
*/
public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
}
/**
@ -692,7 +701,7 @@ public class CSVFormat implements Serializable {
*/
public CSVFormat withNullString(final String nullString) {
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
}
/**
@ -722,7 +731,7 @@ public class CSVFormat implements Serializable {
throw new IllegalArgumentException("The quoteChar cannot be a line break");
}
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
}
/**
@ -735,7 +744,7 @@ public class CSVFormat implements Serializable {
*/
public CSVFormat withQuotePolicy(final Quote quotePolicy) {
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
}
/**
@ -760,6 +769,20 @@ public class CSVFormat implements Serializable {
*/
public CSVFormat withRecordSeparator(final String recordSeparator) {
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
}
/**
* Sets whether to skip the header record.
*
* @param skipHeaderRecord
* whether to skip the header record.
*
* @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting.
* @see #withHeader(String...)
*/
public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
}
}

View File

@ -323,16 +323,19 @@ public class CSVParser implements Iterable<CSVRecord>, Closeable {
Map<String, Integer> hdrMap = null;
String[] formatHeader = this.format.getHeader();
if (formatHeader != null) {
final CSVRecord record = this.nextRecord();
hdrMap = new LinkedHashMap<String, Integer>();
String[] header = null;
if (formatHeader.length == 0) {
// read the header from the first line of the file
final CSVRecord record = this.nextRecord();
if (record != null) {
header = record.values();
}
} else {
if (this.format.getSkipHeaderRecord()) {
this.nextRecord();
}
header = formatHeader;
}

View File

@ -509,7 +509,8 @@ public class CSVParserTest {
@Test
public void testSkipSetHeader() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("a", "b", "c").parse(in).iterator();
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("a", "b", "c").withSkipHeaderRecord(true)
.parse(in).iterator();
final CSVRecord record = records.next();
assertEquals("1", record.get("a"));
assertEquals("2", record.get("b"));
@ -549,7 +550,7 @@ public class CSVParserTest {
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("A", "B", "C").parse(in).iterator();
for (int i = 0; i < 2; i++) {
for (int i = 0; i < 3; i++) {
assertTrue(records.hasNext());
final CSVRecord record = records.next();
assertTrue(record.isMapped("A"));
@ -588,7 +589,8 @@ public class CSVParserTest {
@Test
public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception {
final Reader in = new StringReader("a,b,c\n1,2\nx,y,z");
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("A", "B", "C").parse(in).iterator();
final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("A", "B", "C").withSkipHeaderRecord(true)
.parse(in).iterator();
CSVRecord record;
// 1st record
@ -631,7 +633,7 @@ public class CSVParserTest {
final Iterator<CSVRecord> records = parser.iterator();
// Parse to make sure getHeaderMap did not have a side-effect.
for (int i = 0; i < 2; i++) {
for (int i = 0; i < 3; i++) {
assertTrue(records.hasNext());
final CSVRecord record = records.next();
assertEquals(record.get(0), record.get("A"));