diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index 6bf3ed18..77a009dc 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -188,8 +188,6 @@ public final class CSVFormat implements Serializable { return new Builder(csvFormat); } - private boolean allowDuplicateHeaderNames; - private boolean allowMissingColumnNames; private boolean autoFlush; @@ -198,6 +196,8 @@ public final class CSVFormat implements Serializable { private String delimiter; + private DuplicateHeaderMode duplicateHeaderMode; + private Character escapeCharacter; private String[] headerComments; @@ -245,7 +245,7 @@ public final class CSVFormat implements Serializable { this.trim = csvFormat.trim; this.autoFlush = csvFormat.autoFlush; this.quotedNullString = csvFormat.quotedNullString; - this.allowDuplicateHeaderNames = csvFormat.allowDuplicateHeaderNames; + this.duplicateHeaderMode = csvFormat.duplicateHeaderMode; } /** @@ -262,12 +262,26 @@ public final class CSVFormat implements Serializable { * * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. * @return This instance. + * @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}. */ + @Deprecated public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { - this.allowDuplicateHeaderNames = allowDuplicateHeaderNames; + final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY; + setDuplicateHeaderMode(mode); return this; } + /** + * Sets the duplicate header names behavior. + * + * @param duplicateHeaderMode the duplicate header names behavior + * @return This instance. + */ + public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) { + this.duplicateHeaderMode = duplicateHeaderMode; + return this; + } + /** * Sets the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an * {@link IllegalArgumentException} to be thrown. @@ -760,7 +774,8 @@ public final class CSVFormat implements Serializable { } /** - * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines. + * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing + * empty lines. * *

* The {@link Builder} settings are: @@ -770,13 +785,13 @@ public final class CSVFormat implements Serializable { *

  • {@code setQuote('"')}
  • *
  • {@code setRecordSeparator("\r\n")}
  • *
  • {@code setIgnoreEmptyLines(true)}
  • - *
  • {@code setAllowDuplicateHeaderNames(true)}
  • + *
  • {@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}
  • * * * @see Predefined#Default */ public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false, - false, false, false, true); + false, false, false, DuplicateHeaderMode.ALLOW_ALL); /** * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary @@ -799,7 +814,7 @@ public final class CSVFormat implements Serializable { *
  • {@code setRecordSeparator("\r\n")}
  • *
  • {@code setIgnoreEmptyLines(false)}
  • *
  • {@code setAllowMissingColumnNames(true)}
  • - *
  • {@code setAllowDuplicateHeaderNames(true)}
  • + *
  • {@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}
  • * *

    * Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and @@ -1220,7 +1235,7 @@ public final class CSVFormat implements Serializable { */ public static CSVFormat newFormat(final char delimiter) { return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false, - true); + DuplicateHeaderMode.ALLOW_ALL); } static String[] toStringArray(final Object[] values) { @@ -1262,7 +1277,7 @@ public final class CSVFormat implements Serializable { return CSVFormat.Predefined.valueOf(format).getFormat(); } - private final boolean allowDuplicateHeaderNames; + private final DuplicateHeaderMode duplicateHeaderMode; private final boolean allowMissingColumnNames; @@ -1319,7 +1334,7 @@ public final class CSVFormat implements Serializable { this.trim = builder.trim; this.autoFlush = builder.autoFlush; this.quotedNullString = builder.quotedNullString; - this.allowDuplicateHeaderNames = builder.allowDuplicateHeaderNames; + this.duplicateHeaderMode = builder.duplicateHeaderMode; validate(); } @@ -1343,14 +1358,14 @@ public final class CSVFormat implements Serializable { * @param trim TODO Doc me. * @param trailingDelimiter TODO Doc me. * @param autoFlush TODO Doc me. - * @param allowDuplicateHeaderNames TODO Doc me. + * @param duplicateHeaderMode the behavior when handling duplicate headers * @throws IllegalArgumentException if the delimiter is a line break character. */ private CSVFormat(final String delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape, final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString, final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush, - final boolean allowDuplicateHeaderNames) { + final DuplicateHeaderMode duplicateHeaderMode) { this.delimiter = delimiter; this.quoteCharacter = quoteChar; this.quoteMode = quoteMode; @@ -1369,7 +1384,7 @@ public final class CSVFormat implements Serializable { this.trim = trim; this.autoFlush = autoFlush; this.quotedNullString = quoteCharacter + nullString + quoteCharacter; - this.allowDuplicateHeaderNames = allowDuplicateHeaderNames; + this.duplicateHeaderMode = duplicateHeaderMode; validate(); } @@ -1416,7 +1431,7 @@ public final class CSVFormat implements Serializable { return false; } final CSVFormat other = (CSVFormat) obj; - return allowDuplicateHeaderNames == other.allowDuplicateHeaderNames && allowMissingColumnNames == other.allowMissingColumnNames && + return duplicateHeaderMode == other.duplicateHeaderMode && allowMissingColumnNames == other.allowMissingColumnNames && autoFlush == other.autoFlush && Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) && Objects.equals(escapeCharacter, other.escapeCharacter) && Arrays.equals(header, other.header) && Arrays.equals(headerComments, other.headerComments) && ignoreEmptyLines == other.ignoreEmptyLines && @@ -1450,9 +1465,21 @@ public final class CSVFormat implements Serializable { * * @return whether duplicate header names are allowed * @since 1.7 + * @deprecated Use {@link #getDuplicateHeaderMode()}. */ + @Deprecated public boolean getAllowDuplicateHeaderNames() { - return allowDuplicateHeaderNames; + return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL; + } + + /** + * Gets how duplicate headers are handled. + * + * @return if duplicate header values are allowed, allowed conditionally, or disallowed. + * @since 1.9.0 + */ + public DuplicateHeaderMode getDuplicateHeaderMode() { + return duplicateHeaderMode; } /** @@ -1633,7 +1660,7 @@ public final class CSVFormat implements Serializable { int result = 1; result = prime * result + Arrays.hashCode(header); result = prime * result + Arrays.hashCode(headerComments); - return prime * result + Objects.hash(allowDuplicateHeaderNames, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter, + return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter, ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator, skipHeaderRecord, trailingDelimiter, trim); } @@ -2235,7 +2262,7 @@ public final class CSVFormat implements Serializable { } // validate header - if (header != null && !allowDuplicateHeaderNames) { + if (header != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) { final Set dupCheck = new HashSet<>(); for (final String hdr : header) { if (!dupCheck.add(hdr)) { @@ -2254,7 +2281,7 @@ public final class CSVFormat implements Serializable { */ @Deprecated public CSVFormat withAllowDuplicateHeaderNames() { - return builder().setAllowDuplicateHeaderNames(true).build(); + return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).build(); } /** @@ -2267,7 +2294,8 @@ public final class CSVFormat implements Serializable { */ @Deprecated public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { - return builder().setAllowDuplicateHeaderNames(allowDuplicateHeaderNames).build(); + final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY; + return builder().setDuplicateHeaderMode(mode).build(); } /** diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 60ecc737..58cdb146 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -497,12 +497,16 @@ public final class CSVParser implements Iterable, Closeable { throw new IllegalArgumentException( "A header name is missing in " + Arrays.toString(headerRecord)); } - // Note: This will always allow a duplicate header if the header is empty + final boolean containsHeader = header != null && hdrMap.containsKey(header); - if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) { + final DuplicateHeaderMode headerMode = this.format.getDuplicateHeaderMode(); + final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL; + final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY; + + if (containsHeader && !duplicatesAllowed && !(emptyHeader && emptyDuplicatesAllowed)) { throw new IllegalArgumentException( String.format( - "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().", + "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().", header, Arrays.toString(headerRecord))); } if (header != null) { diff --git a/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java new file mode 100644 index 00000000..e623adaa --- /dev/null +++ b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv; + +/** + * Determines how duplicate header fields should be handled + * if {@link CSVFormat#withHeader(String...)} is not null. + * + * @since 1.9.0 + */ +public enum DuplicateHeaderMode { + + /** + * Allows all duplicate headers. + */ + ALLOW_ALL, + + /** + * Allows duplicate headers only if they're empty strings or null. + */ + ALLOW_EMPTY, + + /** + * Disallows duplicate headers entirely. + */ + DISALLOW +} diff --git a/src/site/resources/checkstyle/checkstyle-suppressions.xml b/src/site/resources/checkstyle/checkstyle-suppressions.xml index 402525fd..abff74c8 100644 --- a/src/site/resources/checkstyle/checkstyle-suppressions.xml +++ b/src/site/resources/checkstyle/checkstyle-suppressions.xml @@ -19,5 +19,5 @@ "-//Checkstyle//DTD SuppressionFilter Configuration 1.2//EN" "https://checkstyle.org/dtds/suppressions_1_2.dtd"> - + diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java b/src/test/java/org/apache/commons/csv/CSVFormatTest.java index 682764f9..f7c32bd6 100644 --- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java +++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java @@ -260,6 +260,10 @@ public class CSVFormatTest { final Object a = method.invoke(CSVFormat.DEFAULT, QuoteMode.MINIMAL); final Object b = method.invoke(CSVFormat.DEFAULT, QuoteMode.ALL); assertNotEquals(name, type, a, b); + } else if ("org.apache.commons.csv.DuplicateHeaderMode".equals(type)) { + final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.ALLOW_ALL}); + final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.DISALLOW}); + assertNotEquals(name, type, a, b); } else if ("java.lang.Object[]".equals(type)){ final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {null, null}}); final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {new Object(), new Object()}}); @@ -1295,6 +1299,15 @@ public class CSVFormatTest { } + @Test + public void testWithEmptyDuplicates() { + final CSVFormat formatWithEmptyDuplicates = + CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).build(); + + assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, formatWithEmptyDuplicates.getDuplicateHeaderMode()); + assertFalse(formatWithEmptyDuplicates.getAllowDuplicateHeaderNames()); + } + @Test public void testWithEscapeCRThrowsExceptions() { assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(CR)); diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java new file mode 100644 index 00000000..bbbb2624 --- /dev/null +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv.issues; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.DuplicateHeaderMode; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.io.IOException; +import java.io.StringReader; + +/** + * When {@link CSVFormat#withHeader(String...)} is not null; duplicate headers + * with empty strings should not be allowed. + * + * @see Jira Ticker + */ +public class JiraCsv264Test { + + private static final String CSV_STRING = "\"\",\"B\",\"\"\n" + + "\"1\",\"2\",\"3\"\n" + + "\"4\",\"5\",\"6\""; + + /** + * A CSV file with a random gap in the middle. + */ + private static final String CSV_STRING_GAP = "\"A\",\"B\",\"\",\"\",\"E\"\n" + + "\"1\",\"2\",\"\",\"\",\"5\"\n" + + "\"6\",\"7\",\"\",\"\",\"10\""; + + @Test + public void testJiraCsv264() throws IOException { + final CSVFormat csvFormat = CSVFormat.DEFAULT + .builder() + .setHeader() + .setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW) + .setAllowMissingColumnNames(true) + .build(); + + try (StringReader reader = new StringReader(CSV_STRING)) { + assertThrows(IllegalArgumentException.class, () -> csvFormat.parse(reader)); + } + } + + @Test + public void testJiraCsv264WithGapAllowEmpty() throws IOException { + final CSVFormat csvFormat = CSVFormat.DEFAULT + .builder() + .setHeader() + .setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY) + .setAllowMissingColumnNames(true) + .build(); + + try (StringReader reader = new StringReader(CSV_STRING_GAP)) { + csvFormat.parse(reader); + } + } + + @Test + public void testJiraCsv264WithGapDisallow() throws IOException { + final CSVFormat csvFormat = CSVFormat.DEFAULT + .builder() + .setHeader() + .setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW) + .setAllowMissingColumnNames(true) + .build(); + + try (StringReader reader = new StringReader(CSV_STRING_GAP)) { + assertThrows(IllegalArgumentException.class, () -> csvFormat.parse(reader)); + } + } +}