CSV-264: Added DuplicateHeaderMode for flexibility with header strictness. (#114)
* csv-264: added duplicateheadermode for flexibility with header strictness * fix: use assertthrows and update docs
This commit is contained in:
parent
caa59a3c09
commit
4fdfc59b1c
|
@ -188,8 +188,6 @@ public final class CSVFormat implements Serializable {
|
|||
return new Builder(csvFormat);
|
||||
}
|
||||
|
||||
private boolean allowDuplicateHeaderNames;
|
||||
|
||||
private boolean allowMissingColumnNames;
|
||||
|
||||
private boolean autoFlush;
|
||||
|
@ -198,6 +196,8 @@ public final class CSVFormat implements Serializable {
|
|||
|
||||
private String delimiter;
|
||||
|
||||
private DuplicateHeaderMode duplicateHeaderMode;
|
||||
|
||||
private Character escapeCharacter;
|
||||
|
||||
private String[] headerComments;
|
||||
|
@ -245,7 +245,7 @@ public final class CSVFormat implements Serializable {
|
|||
this.trim = csvFormat.trim;
|
||||
this.autoFlush = csvFormat.autoFlush;
|
||||
this.quotedNullString = csvFormat.quotedNullString;
|
||||
this.allowDuplicateHeaderNames = csvFormat.allowDuplicateHeaderNames;
|
||||
this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -262,12 +262,26 @@ public final class CSVFormat implements Serializable {
|
|||
*
|
||||
* @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow.
|
||||
* @return This instance.
|
||||
* @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}.
|
||||
*/
|
||||
@Deprecated
|
||||
public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
|
||||
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
|
||||
final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
|
||||
setDuplicateHeaderMode(mode);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the duplicate header names behavior.
|
||||
*
|
||||
* @param duplicateHeaderMode the duplicate header names behavior
|
||||
* @return This instance.
|
||||
*/
|
||||
public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) {
|
||||
this.duplicateHeaderMode = duplicateHeaderMode;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an
|
||||
* {@link IllegalArgumentException} to be thrown.
|
||||
|
@ -760,7 +774,8 @@ public final class CSVFormat implements Serializable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines.
|
||||
* Standard Comma Separated Value format, as for {@link #RFC4180} but allowing
|
||||
* empty lines.
|
||||
*
|
||||
* <p>
|
||||
* The {@link Builder} settings are:
|
||||
|
@ -770,13 +785,13 @@ public final class CSVFormat implements Serializable {
|
|||
* <li>{@code setQuote('"')}</li>
|
||||
* <li>{@code setRecordSeparator("\r\n")}</li>
|
||||
* <li>{@code setIgnoreEmptyLines(true)}</li>
|
||||
* <li>{@code setAllowDuplicateHeaderNames(true)}</li>
|
||||
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
|
||||
* </ul>
|
||||
*
|
||||
* @see Predefined#Default
|
||||
*/
|
||||
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false,
|
||||
false, false, false, true);
|
||||
false, false, false, DuplicateHeaderMode.ALLOW_ALL);
|
||||
|
||||
/**
|
||||
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
|
||||
|
@ -799,7 +814,7 @@ public final class CSVFormat implements Serializable {
|
|||
* <li>{@code setRecordSeparator("\r\n")}</li>
|
||||
* <li>{@code setIgnoreEmptyLines(false)}</li>
|
||||
* <li>{@code setAllowMissingColumnNames(true)}</li>
|
||||
* <li>{@code setAllowDuplicateHeaderNames(true)}</li>
|
||||
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and
|
||||
|
@ -1220,7 +1235,7 @@ public final class CSVFormat implements Serializable {
|
|||
*/
|
||||
public static CSVFormat newFormat(final char delimiter) {
|
||||
return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false,
|
||||
true);
|
||||
DuplicateHeaderMode.ALLOW_ALL);
|
||||
}
|
||||
|
||||
static String[] toStringArray(final Object[] values) {
|
||||
|
@ -1262,7 +1277,7 @@ public final class CSVFormat implements Serializable {
|
|||
return CSVFormat.Predefined.valueOf(format).getFormat();
|
||||
}
|
||||
|
||||
private final boolean allowDuplicateHeaderNames;
|
||||
private final DuplicateHeaderMode duplicateHeaderMode;
|
||||
|
||||
private final boolean allowMissingColumnNames;
|
||||
|
||||
|
@ -1319,7 +1334,7 @@ public final class CSVFormat implements Serializable {
|
|||
this.trim = builder.trim;
|
||||
this.autoFlush = builder.autoFlush;
|
||||
this.quotedNullString = builder.quotedNullString;
|
||||
this.allowDuplicateHeaderNames = builder.allowDuplicateHeaderNames;
|
||||
this.duplicateHeaderMode = builder.duplicateHeaderMode;
|
||||
validate();
|
||||
}
|
||||
|
||||
|
@ -1343,14 +1358,14 @@ public final class CSVFormat implements Serializable {
|
|||
* @param trim TODO Doc me.
|
||||
* @param trailingDelimiter TODO Doc me.
|
||||
* @param autoFlush TODO Doc me.
|
||||
* @param allowDuplicateHeaderNames TODO Doc me.
|
||||
* @param duplicateHeaderMode the behavior when handling duplicate headers
|
||||
* @throws IllegalArgumentException if the delimiter is a line break character.
|
||||
*/
|
||||
private CSVFormat(final String delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape,
|
||||
final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
|
||||
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames,
|
||||
final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush,
|
||||
final boolean allowDuplicateHeaderNames) {
|
||||
final DuplicateHeaderMode duplicateHeaderMode) {
|
||||
this.delimiter = delimiter;
|
||||
this.quoteCharacter = quoteChar;
|
||||
this.quoteMode = quoteMode;
|
||||
|
@ -1369,7 +1384,7 @@ public final class CSVFormat implements Serializable {
|
|||
this.trim = trim;
|
||||
this.autoFlush = autoFlush;
|
||||
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
|
||||
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
|
||||
this.duplicateHeaderMode = duplicateHeaderMode;
|
||||
validate();
|
||||
}
|
||||
|
||||
|
@ -1416,7 +1431,7 @@ public final class CSVFormat implements Serializable {
|
|||
return false;
|
||||
}
|
||||
final CSVFormat other = (CSVFormat) obj;
|
||||
return allowDuplicateHeaderNames == other.allowDuplicateHeaderNames && allowMissingColumnNames == other.allowMissingColumnNames &&
|
||||
return duplicateHeaderMode == other.duplicateHeaderMode && allowMissingColumnNames == other.allowMissingColumnNames &&
|
||||
autoFlush == other.autoFlush && Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) &&
|
||||
Objects.equals(escapeCharacter, other.escapeCharacter) && Arrays.equals(header, other.header) &&
|
||||
Arrays.equals(headerComments, other.headerComments) && ignoreEmptyLines == other.ignoreEmptyLines &&
|
||||
|
@ -1450,9 +1465,21 @@ public final class CSVFormat implements Serializable {
|
|||
*
|
||||
* @return whether duplicate header names are allowed
|
||||
* @since 1.7
|
||||
* @deprecated Use {@link #getDuplicateHeaderMode()}.
|
||||
*/
|
||||
@Deprecated
|
||||
public boolean getAllowDuplicateHeaderNames() {
|
||||
return allowDuplicateHeaderNames;
|
||||
return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets how duplicate headers are handled.
|
||||
*
|
||||
* @return if duplicate header values are allowed, allowed conditionally, or disallowed.
|
||||
* @since 1.9.0
|
||||
*/
|
||||
public DuplicateHeaderMode getDuplicateHeaderMode() {
|
||||
return duplicateHeaderMode;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1633,7 +1660,7 @@ public final class CSVFormat implements Serializable {
|
|||
int result = 1;
|
||||
result = prime * result + Arrays.hashCode(header);
|
||||
result = prime * result + Arrays.hashCode(headerComments);
|
||||
return prime * result + Objects.hash(allowDuplicateHeaderNames, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
|
||||
return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
|
||||
ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator,
|
||||
skipHeaderRecord, trailingDelimiter, trim);
|
||||
}
|
||||
|
@ -2235,7 +2262,7 @@ public final class CSVFormat implements Serializable {
|
|||
}
|
||||
|
||||
// validate header
|
||||
if (header != null && !allowDuplicateHeaderNames) {
|
||||
if (header != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) {
|
||||
final Set<String> dupCheck = new HashSet<>();
|
||||
for (final String hdr : header) {
|
||||
if (!dupCheck.add(hdr)) {
|
||||
|
@ -2254,7 +2281,7 @@ public final class CSVFormat implements Serializable {
|
|||
*/
|
||||
@Deprecated
|
||||
public CSVFormat withAllowDuplicateHeaderNames() {
|
||||
return builder().setAllowDuplicateHeaderNames(true).build();
|
||||
return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).build();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2267,7 +2294,8 @@ public final class CSVFormat implements Serializable {
|
|||
*/
|
||||
@Deprecated
|
||||
public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
|
||||
return builder().setAllowDuplicateHeaderNames(allowDuplicateHeaderNames).build();
|
||||
final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
|
||||
return builder().setDuplicateHeaderMode(mode).build();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -497,12 +497,16 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
throw new IllegalArgumentException(
|
||||
"A header name is missing in " + Arrays.toString(headerRecord));
|
||||
}
|
||||
// Note: This will always allow a duplicate header if the header is empty
|
||||
|
||||
final boolean containsHeader = header != null && hdrMap.containsKey(header);
|
||||
if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) {
|
||||
final DuplicateHeaderMode headerMode = this.format.getDuplicateHeaderMode();
|
||||
final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
|
||||
final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;
|
||||
|
||||
if (containsHeader && !duplicatesAllowed && !(emptyHeader && emptyDuplicatesAllowed)) {
|
||||
throw new IllegalArgumentException(
|
||||
String.format(
|
||||
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
|
||||
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
|
||||
header, Arrays.toString(headerRecord)));
|
||||
}
|
||||
if (header != null) {
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv;
|
||||
|
||||
/**
|
||||
* Determines how duplicate header fields should be handled
|
||||
* if {@link CSVFormat#withHeader(String...)} is not null.
|
||||
*
|
||||
* @since 1.9.0
|
||||
*/
|
||||
public enum DuplicateHeaderMode {
|
||||
|
||||
/**
|
||||
* Allows all duplicate headers.
|
||||
*/
|
||||
ALLOW_ALL,
|
||||
|
||||
/**
|
||||
* Allows duplicate headers only if they're empty strings or null.
|
||||
*/
|
||||
ALLOW_EMPTY,
|
||||
|
||||
/**
|
||||
* Disallows duplicate headers entirely.
|
||||
*/
|
||||
DISALLOW
|
||||
}
|
|
@ -19,5 +19,5 @@
|
|||
"-//Checkstyle//DTD SuppressionFilter Configuration 1.2//EN"
|
||||
"https://checkstyle.org/dtds/suppressions_1_2.dtd">
|
||||
<suppressions>
|
||||
<suppress checks="LineLength" files="[\\/]CSVParser\.java$" lines="511"/>
|
||||
<suppress checks="LineLength" files="[\\/]CSVParser\.java$" lines="515"/>
|
||||
</suppressions>
|
||||
|
|
|
@ -260,6 +260,10 @@ public class CSVFormatTest {
|
|||
final Object a = method.invoke(CSVFormat.DEFAULT, QuoteMode.MINIMAL);
|
||||
final Object b = method.invoke(CSVFormat.DEFAULT, QuoteMode.ALL);
|
||||
assertNotEquals(name, type, a, b);
|
||||
} else if ("org.apache.commons.csv.DuplicateHeaderMode".equals(type)) {
|
||||
final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.ALLOW_ALL});
|
||||
final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.DISALLOW});
|
||||
assertNotEquals(name, type, a, b);
|
||||
} else if ("java.lang.Object[]".equals(type)){
|
||||
final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {null, null}});
|
||||
final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {new Object(), new Object()}});
|
||||
|
@ -1295,6 +1299,15 @@ public class CSVFormatTest {
|
|||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testWithEmptyDuplicates() {
|
||||
final CSVFormat formatWithEmptyDuplicates =
|
||||
CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).build();
|
||||
|
||||
assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, formatWithEmptyDuplicates.getDuplicateHeaderMode());
|
||||
assertFalse(formatWithEmptyDuplicates.getAllowDuplicateHeaderNames());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithEscapeCRThrowsExceptions() {
|
||||
assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(CR));
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv.issues;
|
||||
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.DuplicateHeaderMode;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
||||
/**
|
||||
* When {@link CSVFormat#withHeader(String...)} is not null; duplicate headers
|
||||
* with empty strings should not be allowed.
|
||||
*
|
||||
* @see <a href="https://issues.apache.org/jira/browse/CSV-264">Jira Ticker</a>
|
||||
*/
|
||||
public class JiraCsv264Test {
|
||||
|
||||
private static final String CSV_STRING = "\"\",\"B\",\"\"\n" +
|
||||
"\"1\",\"2\",\"3\"\n" +
|
||||
"\"4\",\"5\",\"6\"";
|
||||
|
||||
/**
|
||||
* A CSV file with a random gap in the middle.
|
||||
*/
|
||||
private static final String CSV_STRING_GAP = "\"A\",\"B\",\"\",\"\",\"E\"\n" +
|
||||
"\"1\",\"2\",\"\",\"\",\"5\"\n" +
|
||||
"\"6\",\"7\",\"\",\"\",\"10\"";
|
||||
|
||||
@Test
|
||||
public void testJiraCsv264() throws IOException {
|
||||
final CSVFormat csvFormat = CSVFormat.DEFAULT
|
||||
.builder()
|
||||
.setHeader()
|
||||
.setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW)
|
||||
.setAllowMissingColumnNames(true)
|
||||
.build();
|
||||
|
||||
try (StringReader reader = new StringReader(CSV_STRING)) {
|
||||
assertThrows(IllegalArgumentException.class, () -> csvFormat.parse(reader));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJiraCsv264WithGapAllowEmpty() throws IOException {
|
||||
final CSVFormat csvFormat = CSVFormat.DEFAULT
|
||||
.builder()
|
||||
.setHeader()
|
||||
.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY)
|
||||
.setAllowMissingColumnNames(true)
|
||||
.build();
|
||||
|
||||
try (StringReader reader = new StringReader(CSV_STRING_GAP)) {
|
||||
csvFormat.parse(reader);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJiraCsv264WithGapDisallow() throws IOException {
|
||||
final CSVFormat csvFormat = CSVFormat.DEFAULT
|
||||
.builder()
|
||||
.setHeader()
|
||||
.setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW)
|
||||
.setAllowMissingColumnNames(true)
|
||||
.build();
|
||||
|
||||
try (StringReader reader = new StringReader(CSV_STRING_GAP)) {
|
||||
assertThrows(IllegalArgumentException.class, () -> csvFormat.parse(reader));
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue