diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 6bf3ed18..77a009dc 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -188,8 +188,6 @@ public final class CSVFormat implements Serializable {
return new Builder(csvFormat);
}
- private boolean allowDuplicateHeaderNames;
-
private boolean allowMissingColumnNames;
private boolean autoFlush;
@@ -198,6 +196,8 @@ public final class CSVFormat implements Serializable {
private String delimiter;
+ private DuplicateHeaderMode duplicateHeaderMode;
+
private Character escapeCharacter;
private String[] headerComments;
@@ -245,7 +245,7 @@ public final class CSVFormat implements Serializable {
this.trim = csvFormat.trim;
this.autoFlush = csvFormat.autoFlush;
this.quotedNullString = csvFormat.quotedNullString;
- this.allowDuplicateHeaderNames = csvFormat.allowDuplicateHeaderNames;
+ this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
}
/**
@@ -262,12 +262,26 @@ public final class CSVFormat implements Serializable {
*
* @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow.
* @return This instance.
+ * @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}.
*/
+ @Deprecated
public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
- this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
+ final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
+ setDuplicateHeaderMode(mode);
return this;
}
+ /**
+ * Sets the duplicate header names behavior.
+ *
+ * @param duplicateHeaderMode the duplicate header names behavior
+ * @return This instance.
+ */
+ public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) {
+ this.duplicateHeaderMode = duplicateHeaderMode;
+ return this;
+ }
+
/**
* Sets the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an
* {@link IllegalArgumentException} to be thrown.
@@ -760,7 +774,8 @@ public final class CSVFormat implements Serializable {
}
/**
- * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines.
+ * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing
+ * empty lines.
*
*
* The {@link Builder} settings are:
@@ -770,13 +785,13 @@ public final class CSVFormat implements Serializable {
*
{@code setQuote('"')}
* {@code setRecordSeparator("\r\n")}
* {@code setIgnoreEmptyLines(true)}
- * {@code setAllowDuplicateHeaderNames(true)}
+ * {@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}
*
*
* @see Predefined#Default
*/
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false,
- false, false, false, true);
+ false, false, false, DuplicateHeaderMode.ALLOW_ALL);
/**
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
@@ -799,7 +814,7 @@ public final class CSVFormat implements Serializable {
* {@code setRecordSeparator("\r\n")}
* {@code setIgnoreEmptyLines(false)}
* {@code setAllowMissingColumnNames(true)}
- * {@code setAllowDuplicateHeaderNames(true)}
+ * {@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}
*
*
* Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and
@@ -1220,7 +1235,7 @@ public final class CSVFormat implements Serializable {
*/
public static CSVFormat newFormat(final char delimiter) {
return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false,
- true);
+ DuplicateHeaderMode.ALLOW_ALL);
}
static String[] toStringArray(final Object[] values) {
@@ -1262,7 +1277,7 @@ public final class CSVFormat implements Serializable {
return CSVFormat.Predefined.valueOf(format).getFormat();
}
- private final boolean allowDuplicateHeaderNames;
+ private final DuplicateHeaderMode duplicateHeaderMode;
private final boolean allowMissingColumnNames;
@@ -1319,7 +1334,7 @@ public final class CSVFormat implements Serializable {
this.trim = builder.trim;
this.autoFlush = builder.autoFlush;
this.quotedNullString = builder.quotedNullString;
- this.allowDuplicateHeaderNames = builder.allowDuplicateHeaderNames;
+ this.duplicateHeaderMode = builder.duplicateHeaderMode;
validate();
}
@@ -1343,14 +1358,14 @@ public final class CSVFormat implements Serializable {
* @param trim TODO Doc me.
* @param trailingDelimiter TODO Doc me.
* @param autoFlush TODO Doc me.
- * @param allowDuplicateHeaderNames TODO Doc me.
+ * @param duplicateHeaderMode the behavior when handling duplicate headers
* @throws IllegalArgumentException if the delimiter is a line break character.
*/
private CSVFormat(final String delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape,
final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames,
final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush,
- final boolean allowDuplicateHeaderNames) {
+ final DuplicateHeaderMode duplicateHeaderMode) {
this.delimiter = delimiter;
this.quoteCharacter = quoteChar;
this.quoteMode = quoteMode;
@@ -1369,7 +1384,7 @@ public final class CSVFormat implements Serializable {
this.trim = trim;
this.autoFlush = autoFlush;
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
- this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
+ this.duplicateHeaderMode = duplicateHeaderMode;
validate();
}
@@ -1416,7 +1431,7 @@ public final class CSVFormat implements Serializable {
return false;
}
final CSVFormat other = (CSVFormat) obj;
- return allowDuplicateHeaderNames == other.allowDuplicateHeaderNames && allowMissingColumnNames == other.allowMissingColumnNames &&
+ return duplicateHeaderMode == other.duplicateHeaderMode && allowMissingColumnNames == other.allowMissingColumnNames &&
autoFlush == other.autoFlush && Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) &&
Objects.equals(escapeCharacter, other.escapeCharacter) && Arrays.equals(header, other.header) &&
Arrays.equals(headerComments, other.headerComments) && ignoreEmptyLines == other.ignoreEmptyLines &&
@@ -1450,9 +1465,21 @@ public final class CSVFormat implements Serializable {
*
* @return whether duplicate header names are allowed
* @since 1.7
+ * @deprecated Use {@link #getDuplicateHeaderMode()}.
*/
+ @Deprecated
public boolean getAllowDuplicateHeaderNames() {
- return allowDuplicateHeaderNames;
+ return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL;
+ }
+
+ /**
+ * Gets how duplicate headers are handled.
+ *
+ * @return if duplicate header values are allowed, allowed conditionally, or disallowed.
+ * @since 1.9.0
+ */
+ public DuplicateHeaderMode getDuplicateHeaderMode() {
+ return duplicateHeaderMode;
}
/**
@@ -1633,7 +1660,7 @@ public final class CSVFormat implements Serializable {
int result = 1;
result = prime * result + Arrays.hashCode(header);
result = prime * result + Arrays.hashCode(headerComments);
- return prime * result + Objects.hash(allowDuplicateHeaderNames, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
+ return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator,
skipHeaderRecord, trailingDelimiter, trim);
}
@@ -2235,7 +2262,7 @@ public final class CSVFormat implements Serializable {
}
// validate header
- if (header != null && !allowDuplicateHeaderNames) {
+ if (header != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) {
final Set dupCheck = new HashSet<>();
for (final String hdr : header) {
if (!dupCheck.add(hdr)) {
@@ -2254,7 +2281,7 @@ public final class CSVFormat implements Serializable {
*/
@Deprecated
public CSVFormat withAllowDuplicateHeaderNames() {
- return builder().setAllowDuplicateHeaderNames(true).build();
+ return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).build();
}
/**
@@ -2267,7 +2294,8 @@ public final class CSVFormat implements Serializable {
*/
@Deprecated
public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
- return builder().setAllowDuplicateHeaderNames(allowDuplicateHeaderNames).build();
+ final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
+ return builder().setDuplicateHeaderMode(mode).build();
}
/**
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 60ecc737..58cdb146 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -497,12 +497,16 @@ public final class CSVParser implements Iterable, Closeable {
throw new IllegalArgumentException(
"A header name is missing in " + Arrays.toString(headerRecord));
}
- // Note: This will always allow a duplicate header if the header is empty
+
final boolean containsHeader = header != null && hdrMap.containsKey(header);
- if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) {
+ final DuplicateHeaderMode headerMode = this.format.getDuplicateHeaderMode();
+ final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
+ final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;
+
+ if (containsHeader && !duplicatesAllowed && !(emptyHeader && emptyDuplicatesAllowed)) {
throw new IllegalArgumentException(
String.format(
- "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
+ "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
header, Arrays.toString(headerRecord)));
}
if (header != null) {
diff --git a/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java
new file mode 100644
index 00000000..e623adaa
--- /dev/null
+++ b/src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv;
+
+/**
+ * Determines how duplicate header fields should be handled
+ * if {@link CSVFormat#withHeader(String...)} is not null.
+ *
+ * @since 1.9.0
+ */
+public enum DuplicateHeaderMode {
+
+ /**
+ * Allows all duplicate headers.
+ */
+ ALLOW_ALL,
+
+ /**
+ * Allows duplicate headers only if they're empty strings or null.
+ */
+ ALLOW_EMPTY,
+
+ /**
+ * Disallows duplicate headers entirely.
+ */
+ DISALLOW
+}
diff --git a/src/site/resources/checkstyle/checkstyle-suppressions.xml b/src/site/resources/checkstyle/checkstyle-suppressions.xml
index 402525fd..abff74c8 100644
--- a/src/site/resources/checkstyle/checkstyle-suppressions.xml
+++ b/src/site/resources/checkstyle/checkstyle-suppressions.xml
@@ -19,5 +19,5 @@
"-//Checkstyle//DTD SuppressionFilter Configuration 1.2//EN"
"https://checkstyle.org/dtds/suppressions_1_2.dtd">
-
+
diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
index 682764f9..f7c32bd6 100644
--- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
@@ -260,6 +260,10 @@ public class CSVFormatTest {
final Object a = method.invoke(CSVFormat.DEFAULT, QuoteMode.MINIMAL);
final Object b = method.invoke(CSVFormat.DEFAULT, QuoteMode.ALL);
assertNotEquals(name, type, a, b);
+ } else if ("org.apache.commons.csv.DuplicateHeaderMode".equals(type)) {
+ final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.ALLOW_ALL});
+ final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.DISALLOW});
+ assertNotEquals(name, type, a, b);
} else if ("java.lang.Object[]".equals(type)){
final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {null, null}});
final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {new Object(), new Object()}});
@@ -1295,6 +1299,15 @@ public class CSVFormatTest {
}
+ @Test
+ public void testWithEmptyDuplicates() {
+ final CSVFormat formatWithEmptyDuplicates =
+ CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).build();
+
+ assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, formatWithEmptyDuplicates.getDuplicateHeaderMode());
+ assertFalse(formatWithEmptyDuplicates.getAllowDuplicateHeaderNames());
+ }
+
@Test
public void testWithEscapeCRThrowsExceptions() {
assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(CR));
diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java
new file mode 100644
index 00000000..bbbb2624
--- /dev/null
+++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv.issues;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.DuplicateHeaderMode;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+/**
+ * When {@link CSVFormat#withHeader(String...)} is not null; duplicate headers
+ * with empty strings should not be allowed.
+ *
+ * @see Jira Ticker
+ */
+public class JiraCsv264Test {
+
+ private static final String CSV_STRING = "\"\",\"B\",\"\"\n" +
+ "\"1\",\"2\",\"3\"\n" +
+ "\"4\",\"5\",\"6\"";
+
+ /**
+ * A CSV file with a random gap in the middle.
+ */
+ private static final String CSV_STRING_GAP = "\"A\",\"B\",\"\",\"\",\"E\"\n" +
+ "\"1\",\"2\",\"\",\"\",\"5\"\n" +
+ "\"6\",\"7\",\"\",\"\",\"10\"";
+
+ @Test
+ public void testJiraCsv264() throws IOException {
+ final CSVFormat csvFormat = CSVFormat.DEFAULT
+ .builder()
+ .setHeader()
+ .setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW)
+ .setAllowMissingColumnNames(true)
+ .build();
+
+ try (StringReader reader = new StringReader(CSV_STRING)) {
+ assertThrows(IllegalArgumentException.class, () -> csvFormat.parse(reader));
+ }
+ }
+
+ @Test
+ public void testJiraCsv264WithGapAllowEmpty() throws IOException {
+ final CSVFormat csvFormat = CSVFormat.DEFAULT
+ .builder()
+ .setHeader()
+ .setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY)
+ .setAllowMissingColumnNames(true)
+ .build();
+
+ try (StringReader reader = new StringReader(CSV_STRING_GAP)) {
+ csvFormat.parse(reader);
+ }
+ }
+
+ @Test
+ public void testJiraCsv264WithGapDisallow() throws IOException {
+ final CSVFormat csvFormat = CSVFormat.DEFAULT
+ .builder()
+ .setHeader()
+ .setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW)
+ .setAllowMissingColumnNames(true)
+ .build();
+
+ try (StringReader reader = new StringReader(CSV_STRING_GAP)) {
+ assertThrows(IllegalArgumentException.class, () -> csvFormat.parse(reader));
+ }
+ }
+}