CSVFormat: Sanitise empty headers to the empty string ""

Add more tests for duplicate headers including null header names.
This commit is contained in:
Alex Herbert 2022-10-23 16:36:59 +01:00
parent 481d8b1ff3
commit bd48a767cd
2 changed files with 147 additions and 45 deletions

View File

@ -2304,14 +2304,16 @@ public final class CSVFormat implements Serializable {
// Validate headers // Validate headers
if (headers != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) { if (headers != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) {
final Set<String> dupCheckSet = new HashSet<>(headers.length); final Set<String> dupCheckSet = new HashSet<>(headers.length);
final boolean rejectEmpty = duplicateHeaderMode != DuplicateHeaderMode.ALLOW_EMPTY; final boolean emptyDuplicatesAllowed = duplicateHeaderMode == DuplicateHeaderMode.ALLOW_EMPTY;
for (final String header : headers) { for (String header : headers) {
final boolean blank = isBlank(header); final boolean blank = isBlank(header);
if (rejectEmpty && blank) { // Sanitise all empty headers to the empty string "" when checking duplicates
throw new IllegalArgumentException("Header is empty"); final boolean containsHeader = !dupCheckSet.add(blank ? "" : header);
} if (containsHeader && !(blank && emptyDuplicatesAllowed)) {
if (!blank && !dupCheckSet.add(header)) { throw new IllegalArgumentException(
throw new IllegalArgumentException(String.format("Header '%s' is a duplicate in %s", header, Arrays.toString(headers))); String.format(
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
header, Arrays.toString(headers)));
} }
} }
} }

View File

@ -19,6 +19,7 @@ package org.apache.commons.csv;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
@ -33,22 +34,20 @@ import org.junit.jupiter.params.provider.MethodSource;
public class CSVDuplicateHeaderTest { public class CSVDuplicateHeaderTest {
/** /**
* Return test cases for duplicate header data. Uses the order: * Return test cases for duplicate header data for use in parsing (CSVParser). Uses the order:
* <pre> * <pre>
* DuplicateHeaderMode duplicateHeaderMode * DuplicateHeaderMode duplicateHeaderMode
* boolean allowMissingColumnNames * boolean allowMissingColumnNames
* String[] headers * String[] headers
* boolean valid * boolean valid
* </pre> * </pre>
* <p>
* TODO: Reinstate cases failed by CSVFormat.
* </p>
* *
* @return the stream of arguments * @return the stream of arguments
*/ */
static Stream<Arguments> duplicateHeaderData() { static Stream<Arguments> duplicateHeaderData() {
return Stream.of( return Stream.of(
// Commented out data here are for cases that are only supported for parsing. // TODO: Fix CSVParser which does not sanitise 'empty' names or
// equate null names with empty as duplications
// Any combination with a valid header // Any combination with a valid header
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "B"}, true), Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "B"}, true),
@ -58,6 +57,30 @@ public class CSVDuplicateHeaderTest {
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", "B"}, true), Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", "B"}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "B"}, true), Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "B"}, true),
// Any combination with a valid header including empty
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", ""}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", ""}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", ""}, false),
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", ""}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", ""}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", ""}, true),
// Any combination with a valid header including blank (1 space)
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", " "}, false),
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", " "}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", " "}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", " "}, true),
// Any combination with a valid header including null
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", null}, false),
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", null}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", null}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", null}, true),
// Duplicate non-empty names // Duplicate non-empty names
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "A"}, false), Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "A"}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", "A"}, false), Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", "A"}, false),
@ -68,25 +91,56 @@ public class CSVDuplicateHeaderTest {
// Duplicate empty names // Duplicate empty names
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"", ""}, false), Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"", ""}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"", ""}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"", ""}, false),
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"", ""}, false), Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"", ""}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"", ""}, true), Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"", ""}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"", ""}, true), Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"", ""}, true),
// Duplicate blank names (1 space) // Duplicate blank names (1 space)
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {" ", " "}, false), Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {" ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {" ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {" ", " "}, false),
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {" ", " "}, false), Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {" ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {" ", " "}, true), Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {" ", " "}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {" ", " "}, true), Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {" ", " "}, true),
// Duplicate blank names (3 spaces) // Duplicate blank names (3 spaces)
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {" ", " "}, false), Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {" ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {" ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {" ", " "}, false),
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {" ", " "}, false), Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {" ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {" ", " "}, true), Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {" ", " "}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {" ", " "}, true), Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {" ", " "}, true),
// Duplicate null names
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {null, null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {null, null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {null, null}, false),
// Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {null, null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {null, null}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {null, null}, true),
// Duplicate blank names (1+3 spaces)
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {" ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {" ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {" ", " "}, false),
// Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {" ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {" ", " "}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {" ", " "}, true),
// Duplicate blank names and null names
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {" ", null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {" ", null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {" ", null}, false),
// Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {" ", null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {" ", null}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {" ", null}, true),
// Duplicate non-empty and empty names // Duplicate non-empty and empty names
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "A", "", ""}, false), Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "A", "", ""}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", "A", "", ""}, false), Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", "A", "", ""}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", "A", "", ""}, false),
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", "A", "", ""}, false), Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", "A", "", ""}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", "A", "", ""}, false), Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", "A", "", ""}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "A", "", ""}, true), Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "A", "", ""}, true),
@ -94,31 +148,67 @@ public class CSVDuplicateHeaderTest {
// Duplicate non-empty and blank names // Duplicate non-empty and blank names
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "A", " ", " "}, false), Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "A", " ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", "A", " ", " "}, false), Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", "A", " ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", "A", " ", " "}, false),
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", "A", " ", " "}, false), Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", "A", " ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", "A", " ", " "}, false), Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", "A", " ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "A", " ", " "}, true) Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "A", " ", " "}, true),
// Duplicate non-empty and null names
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "A", null, null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", "A", null, null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", "A", null, null}, false),
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", "A", null, null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", "A", null, null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "A", null, null}, true),
// Duplicate blank names
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", "", ""}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", "", ""}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", "", ""}, false),
Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", "", ""}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", "", ""}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", "", ""}, true),
// Duplicate null names
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", null, null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", null, null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", null, null}, false),
// Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", null, null}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", null, null}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", null, null}, true),
// Duplicate blank names (1+3 spaces)
Arguments.of(DuplicateHeaderMode.DISALLOW, false, new String[] {"A", " ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] {"A", " ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] {"A", " ", " "}, false),
// Arguments.of(DuplicateHeaderMode.DISALLOW, true, new String[] {"A", " ", " "}, false),
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] {"A", " ", " "}, true),
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, true, new String[] {"A", " ", " "}, true)
); );
} }
static Stream<Arguments> duplicateHeaderParseOnlyData() { /**
return Stream.of( * Return test cases for duplicate header data for use in CSVFormat.
// Duplicate empty names * <p>
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, false, new String[] { "", "" }, false), * This filters the parsing test data to all cases where the allow missing column
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] { "", "" }, false), * names flag is true. The allow missing column names is exclusively for parsing.
* CSVFormat validation applies to both parsing and writing and thus validation
// Duplicate blank names (1 space) * is less strict and behaves as if the missing column names constraint is absent.
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] { " ", " " }, false), * The filtered data is then returned with the missing column names flag set to both
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] { " ", " " }, true), * true and false for each test case.
* </p>
// Duplicate blank names (3 spaces) *
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] { " ", " " }, false), * @return the stream of arguments
Arguments.of(DuplicateHeaderMode.ALLOW_EMPTY, true, new String[] { " ", " " }, true), */
static Stream<Arguments> duplicateHeaderAllowsMissingColumnsNamesData() {
// Duplicate non-empty and empty names return duplicateHeaderData()
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] { "A", "A", "", "" }, false), .filter(arg -> Boolean.TRUE.equals(arg.get()[1]))
.flatMap(arg -> {
// Duplicate non-empty and blank names // Return test case with flag as both true and false
Arguments.of(DuplicateHeaderMode.ALLOW_ALL, false, new String[] { "A", "A", " ", " " }, false)); final Object[] data = arg.get().clone();
data[1] = Boolean.FALSE;
return Stream.of(arg, Arguments.of(data));
});
} }
/** /**
@ -130,12 +220,13 @@ public class CSVDuplicateHeaderTest {
* @param valid true if the settings are expected to be valid, otherwise expect a IllegalArgumentException * @param valid true if the settings are expected to be valid, otherwise expect a IllegalArgumentException
*/ */
@ParameterizedTest @ParameterizedTest
@MethodSource(value = {"duplicateHeaderData"}) @MethodSource(value = {"duplicateHeaderAllowsMissingColumnsNamesData"})
public void testCSVFormat(final DuplicateHeaderMode duplicateHeaderMode, public void testCSVFormat(final DuplicateHeaderMode duplicateHeaderMode,
final boolean allowMissingColumnNames, final boolean allowMissingColumnNames,
final String[] headers, final String[] headers,
final boolean valid) { final boolean valid) {
final CSVFormat.Builder builder = CSVFormat.DEFAULT.builder() final CSVFormat.Builder builder =
CSVFormat.DEFAULT.builder()
.setDuplicateHeaderMode(duplicateHeaderMode) .setDuplicateHeaderMode(duplicateHeaderMode)
.setAllowMissingColumnNames(allowMissingColumnNames) .setAllowMissingColumnNames(allowMissingColumnNames)
.setHeader(headers); .setHeader(headers);
@ -159,20 +250,29 @@ public class CSVDuplicateHeaderTest {
* @throws IOException Signals that an I/O exception has occurred. * @throws IOException Signals that an I/O exception has occurred.
*/ */
@ParameterizedTest @ParameterizedTest
@MethodSource(value = {"duplicateHeaderData", "duplicateHeaderParseOnlyData"}) @MethodSource(value = {"duplicateHeaderData"})
public void testCSVParser(final DuplicateHeaderMode duplicateHeaderMode, public void testCSVParser(final DuplicateHeaderMode duplicateHeaderMode,
final boolean allowMissingColumnNames, final boolean allowMissingColumnNames,
final String[] headers, final String[] headers,
final boolean valid) throws IOException { final boolean valid) throws IOException {
final CSVFormat format = CSVFormat.DEFAULT.builder() final CSVFormat format =
CSVFormat.DEFAULT.builder()
.setDuplicateHeaderMode(duplicateHeaderMode) .setDuplicateHeaderMode(duplicateHeaderMode)
.setAllowMissingColumnNames(allowMissingColumnNames) .setAllowMissingColumnNames(allowMissingColumnNames)
.setNullString("NULL")
.setHeader() .setHeader()
.build(); .build();
final String input = Arrays.stream(headers).collect(Collectors.joining(format.getDelimiterString())); final String input = Arrays.stream(headers)
.map(s -> s == null ? "NULL" : s)
.collect(Collectors.joining(format.getDelimiterString()));
if (valid) { if (valid) {
try(CSVParser parser = CSVParser.parse(input, format)) { try(CSVParser parser = CSVParser.parse(input, format)) {
Assertions.assertEquals(Arrays.asList(headers), parser.getHeaderNames()); // Parser ignores null headers
final List<String> expected =
Arrays.stream(headers)
.filter(s -> s != null)
.collect(Collectors.toList());
Assertions.assertEquals(expected, parser.getHeaderNames(), "HeaderNames");
} }
} else { } else {
Assertions.assertThrows(IllegalArgumentException.class, () -> CSVParser.parse(input, format)); Assertions.assertThrows(IllegalArgumentException.class, () -> CSVParser.parse(input, format));