Revert back to NOT skipping a record when withHeader is called with a non-empty array. Add skipHeaderRecord setting to CSVFormat and use when headers are initialized.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1508933 13f79535-47bb-0310-9956-ffa450edef68
2025-02-17 07:26:32 +00:00 · 2013-07-31 15:39:57 +00:00 · 2013-07-31 15:39:57 +00:00 · 97d34575a1
commit 97d34575a1
parent e6759b8c10
3 changed files with 54 additions and 26 deletions
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@ -53,6 +53,7 @@ import java.util.Set;
 public class CSVFormat implements Serializable {

    private static final long serialVersionUID = 1L;
+    
    /**
     * Returns true if the given character is a line break character.
     *
@ -76,6 +77,7 @@ public class CSVFormat implements Serializable {
    private final String recordSeparator; // for outputs
    private final String nullString;
    private final String[] header;
+    private final boolean skipHeaderRecord;

    /**
     * Standard comma separated format, as for {@link #RFC4180} but allowing empty lines.
@ -90,7 +92,7 @@ public class CSVFormat implements Serializable {
     * <li>withIgnoreEmptyLines(true)</li>
     * </ul>
     */
-    public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null);
+    public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, false);

    /**
     * Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
@ -166,7 +168,7 @@ public class CSVFormat implements Serializable {
     * @throws IllegalArgumentException if the delimiter is a line break character
     */
    public static CSVFormat newFormat(final char delimiter) {
-        return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null);
+        return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, false);
    }

    /**
@ -203,6 +205,7 @@ public class CSVFormat implements Serializable {
     *            the line separator to use for output
     * @param header
     *            the header
+     * @param skipHeaderRecord TODO
     * @throws IllegalArgumentException if the delimiter is a line break character
     */
    // package protected to give access without needing a synthetic accessor
@ -210,7 +213,7 @@ public class CSVFormat implements Serializable {
            final Quote quotePolicy, final Character commentStart,
            final Character escape, final boolean ignoreSurroundingSpaces,
            final boolean ignoreEmptyLines, final String recordSeparator,
-            final String nullString, final String[] header) {
+            final String nullString, final String[] header, boolean skipHeaderRecord) {
        if (isLineBreak(delimiter)) {
            throw new IllegalArgumentException("The delimiter cannot be a line break");
        }
@ -224,12 +227,13 @@ public class CSVFormat implements Serializable {
        this.recordSeparator = recordSeparator;
        this.nullString = nullString;
        this.header = header == null ? null : header.clone();
+        this.skipHeaderRecord = skipHeaderRecord;
    }

    CSVFormat(final CSVFormat format) {
        this(format.getDelimiter(), format.getQuoteChar(), format.getQuotePolicy(), format.getCommentStart(),
                format.getEscape(), format.getIgnoreSurroundingSpaces(), format.getIgnoreEmptyLines(),
-                format.getRecordSeparator(), format.getNullString(), format.getHeader());
+                format.getRecordSeparator(), format.getNullString(), format.getHeader(), format.getSkipHeaderRecord());
    }

    @Override
@ -409,6 +413,15 @@ public class CSVFormat implements Serializable {
        return recordSeparator;
    }

+    /**
+     * Returns whether to skip the header record.
+     *
+     * @return whether to skip the header record.
+     */
+    public boolean getSkipHeaderRecord() {
+        return skipHeaderRecord;
+    }
+
    @Override
    public int hashCode()
    {
@ -573,7 +586,7 @@ public class CSVFormat implements Serializable {
            throw new IllegalArgumentException("The comment start character cannot be a line break");
        }
        return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
-                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
+                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
    }

    /**
@ -590,7 +603,7 @@ public class CSVFormat implements Serializable {
            throw new IllegalArgumentException("The delimiter cannot be a line break");
        }
        return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
-                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
+                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
    }

    /**
@ -620,33 +633,29 @@ public class CSVFormat implements Serializable {
            throw new IllegalArgumentException("The escape character cannot be a line break");
        }
        return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
-                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
+                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
    }

    /**
     * Sets the header of the format. The header can either be parsed automatically from the input file with:
     * 
     * <pre>
-     * CSVFormat format = aformat.withHeader();
-     * </pre>
+     * CSVFormat format = aformat.withHeader();</pre>
     * 
     * or specified manually with:
     * 
     * <pre>
-     * CSVFormat format = aformat.withHeader(&quot;name&quot;, &quot;email&quot;, &quot;phone&quot;);
-     * </pre>
-     * 
-     * When this option is is set to any non-null value, the first record is the first <em>data</em> record, not the
-     * header record.
+     * CSVFormat format = aformat.withHeader(&quot;name&quot;, &quot;email&quot;, &quot;phone&quot;);</pre>
     * 
     * @param header
     *            the header, <tt>null</tt> if disabled, empty if parsed automatically, user specified otherwise.
     * 
     * @return A new CSVFormat that is equal to this but with the specified header
+     * @see #withSkipHeaderRecord(boolean)
     */
    public CSVFormat withHeader(final String... header) {
        return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
-                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
+                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
    }

    /**
@ -659,7 +668,7 @@ public class CSVFormat implements Serializable {
     */
    public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
        return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
-                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
+                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
    }

    /**
@ -672,7 +681,7 @@ public class CSVFormat implements Serializable {
     */
    public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
        return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
-                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
+                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
    }

    /**
@ -692,7 +701,7 @@ public class CSVFormat implements Serializable {
     */
    public CSVFormat withNullString(final String nullString) {
        return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
-                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
+                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
    }

    /**
@ -722,7 +731,7 @@ public class CSVFormat implements Serializable {
            throw new IllegalArgumentException("The quoteChar cannot be a line break");
        }
        return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
-                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
+                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
    }

    /**
@ -735,7 +744,7 @@ public class CSVFormat implements Serializable {
     */
    public CSVFormat withQuotePolicy(final Quote quotePolicy) {
        return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
-                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
+                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
    }

    /**
@ -760,6 +769,20 @@ public class CSVFormat implements Serializable {
     */
    public CSVFormat withRecordSeparator(final String recordSeparator) {
        return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
-                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header);
+                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
+    }
+
+    /**
+     * Sets whether to skip the header record.
+     *
+     * @param skipHeaderRecord
+     *            whether to skip the header record.
+     *
+     * @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting.
+     * @see #withHeader(String...)
+     */
+    public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
+        return new CSVFormat(delimiter, quoteChar, quotePolicy, commentStart, escape,
+                ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, header, skipHeaderRecord);
    }
 }
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@ -323,16 +323,19 @@ public class CSVParser implements Iterable<CSVRecord>, Closeable {
        Map<String, Integer> hdrMap = null;
        String[] formatHeader = this.format.getHeader();
        if (formatHeader != null) {
-            final CSVRecord record = this.nextRecord();
            hdrMap = new LinkedHashMap<String, Integer>();

            String[] header = null;
            if (formatHeader.length == 0) {
                // read the header from the first line of the file
+                final CSVRecord record = this.nextRecord();
                if (record != null) {
                    header = record.values();
                }
            } else {
+                if (this.format.getSkipHeaderRecord()) {
+                    this.nextRecord();
+                }
                header = formatHeader;
            }

--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@ -509,7 +509,8 @@ public class CSVParserTest {
    @Test
    public void testSkipSetHeader() throws Exception {
        final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");
-        final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("a", "b", "c").parse(in).iterator();
+        final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("a", "b", "c").withSkipHeaderRecord(true)
+                .parse(in).iterator();
        final CSVRecord record = records.next();
        assertEquals("1", record.get("a"));
        assertEquals("2", record.get("b"));
@ -549,7 +550,7 @@ public class CSVParserTest {

        final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("A", "B", "C").parse(in).iterator();

-        for (int i = 0; i < 2; i++) {
+        for (int i = 0; i < 3; i++) {
            assertTrue(records.hasNext());
            final CSVRecord record = records.next();
            assertTrue(record.isMapped("A"));
@ -588,7 +589,8 @@ public class CSVParserTest {
    @Test
    public void testMappedButNotSetAsOutlook2007ContactExport() throws Exception {
        final Reader in = new StringReader("a,b,c\n1,2\nx,y,z");
-        final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("A", "B", "C").parse(in).iterator();
+        final Iterator<CSVRecord> records = CSVFormat.DEFAULT.withHeader("A", "B", "C").withSkipHeaderRecord(true)
+                .parse(in).iterator();
        CSVRecord record;

        // 1st record
@ -631,7 +633,7 @@ public class CSVParserTest {
        final Iterator<CSVRecord> records = parser.iterator();

        // Parse to make sure getHeaderMap did not have a side-effect.
-        for (int i = 0; i < 2; i++) {
+        for (int i = 0; i < 3; i++) {
            assertTrue(records.hasNext());
            final CSVRecord record = records.next();
            assertEquals(record.get(0), record.get("A"));