[CSV-93] Allow the handling of NULL values.
- [CSV-253] Handle absent values in input (null). - Cleaned up version of PR 77 from dota17 where: - Don't duplicate two state items from the format. - Use try-with-resources. - Remove useless parens. - Update Javaodc. - Sort members in the new tests. - Use builder.
This commit is contained in:
parent
4083b7a2a0
commit
ac280e705d
|
@ -446,8 +446,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
if (lastRecord && inputClean.isEmpty() && this.format.getTrailingDelimiter()) {
|
||||
return;
|
||||
}
|
||||
final String nullString = this.format.getNullString();
|
||||
this.recordList.add(inputClean.equals(nullString) ? null : inputClean);
|
||||
this.recordList.add(handleNull(inputClean));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -636,7 +635,26 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Gets whether this parser is closed.
|
||||
* Handle whether input is parsed as null
|
||||
*
|
||||
* @param input
|
||||
* the cell data to further processed
|
||||
* @return null if input is parsed as null, or input itself if input isn't parsed as null
|
||||
*/
|
||||
private String handleNull(final String input) {
|
||||
final boolean isQuoted = this.reusableToken.isQuoted;
|
||||
final String nullString = format.getNullString();
|
||||
final boolean strictQuoteMode = isStrictQuoteMode();
|
||||
if (input.equals(nullString)) {
|
||||
// nullString = NULL(String), distinguish between "NULL" and NULL in ALL_NON_NULL or NON_NUMERIC quote mode
|
||||
return strictQuoteMode && isQuoted ? input : null;
|
||||
}
|
||||
// don't set nullString, distinguish between "" and ,, (absent values) in All_NON_NULL or NON_NUMERIC quote mode
|
||||
return strictQuoteMode && nullString == null && input.isEmpty() && !isQuoted ? null : input;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests whether this parser is closed.
|
||||
*
|
||||
* @return whether this parser is closed.
|
||||
*/
|
||||
|
@ -645,7 +663,18 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns an iterator on the records.
|
||||
* Tests whether the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or {@link QuoteMode#NON_NUMERIC}.
|
||||
*
|
||||
* @return true if the format's {@link QuoteMode} is {@link QuoteMode#ALL_NON_NULL} or
|
||||
* {@link QuoteMode#NON_NUMERIC}.
|
||||
*/
|
||||
private boolean isStrictQuoteMode() {
|
||||
return this.format.getQuoteMode() == QuoteMode.ALL_NON_NULL ||
|
||||
this.format.getQuoteMode() == QuoteMode.NON_NUMERIC;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the record iterator.
|
||||
*
|
||||
* <p>
|
||||
* An {@link IOException} caught during the iteration are re-thrown as an
|
||||
|
|
|
@ -324,6 +324,7 @@ final class Lexer implements Closeable {
|
|||
* on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL
|
||||
*/
|
||||
private Token parseEncapsulatedToken(final Token token) throws IOException {
|
||||
token.isQuoted = true;
|
||||
// save current line number in case needed for IOE
|
||||
final long startLineNumber = getCurrentLineNumber();
|
||||
int c;
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
package org.apache.commons.csv;
|
||||
|
||||
/**
|
||||
* Defines quoting behavior when printing.
|
||||
* Defines quoting behavior.
|
||||
*/
|
||||
public enum QuoteMode {
|
||||
|
||||
|
|
|
@ -55,10 +55,13 @@ final class Token {
|
|||
/** Token ready flag: indicates a valid token with content (ready for the parser). */
|
||||
boolean isReady;
|
||||
|
||||
boolean isQuoted;
|
||||
|
||||
void reset() {
|
||||
content.setLength(0);
|
||||
type = INVALID;
|
||||
isReady = false;
|
||||
isQuoted = false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -873,7 +873,7 @@ public class CSVPrinterTest {
|
|||
String expected = "\"NULL\"\tNULL\n";
|
||||
assertEquals(expected, writer.toString());
|
||||
String[] record0 = toFirstRecordValues(expected, format);
|
||||
assertArrayEquals(new Object[2], record0);
|
||||
assertArrayEquals(s, record0);
|
||||
|
||||
s = new String[] { "\\N", null };
|
||||
format = CSVFormat.MYSQL.withNullString("\\N");
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.csv.issues;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.CSVParser;
|
||||
import org.apache.commons.csv.CSVRecord;
|
||||
import org.apache.commons.csv.QuoteMode;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
/**
|
||||
* Setting QuoteMode:ALL_NON_NULL or NON_NUMERIC can distinguish between empty string columns and absent value columns.
|
||||
*/
|
||||
public class JiraCsv253Test {
|
||||
|
||||
private void assertArrayEqual(final String[] expected, final CSVRecord actual) {
|
||||
for (int i = 0; i < expected.length; i++) {
|
||||
assertEquals(expected[i], actual.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHandleAbsentValues() throws IOException {
|
||||
final String source = "\"John\",,\"Doe\"\n" + ",\"AA\",123\n" + "\"John\",90,\n" + "\"\",,90";
|
||||
final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.NON_NUMERIC).build();
|
||||
try (final CSVParser parser = csvFormat.parse(new StringReader(source))) {
|
||||
final Iterator<CSVRecord> csvRecords = parser.iterator();
|
||||
assertArrayEqual(new String[] {"John", null, "Doe"}, csvRecords.next());
|
||||
assertArrayEqual(new String[] {null, "AA", "123"}, csvRecords.next());
|
||||
assertArrayEqual(new String[] {"John", "90", null}, csvRecords.next());
|
||||
assertArrayEqual(new String[] {"", null, "90"}, csvRecords.next());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,150 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.csv.issues;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.commons.csv.CSVFormat;
|
||||
import org.apache.commons.csv.CSVParser;
|
||||
import org.apache.commons.csv.CSVRecord;
|
||||
import org.apache.commons.csv.QuoteMode;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
/**
|
||||
* Add more tests about null value.
|
||||
* <p>
|
||||
* QuoteMode:ALL_NON_NULL (Quotes all non-null fields, null will not be quoted but not null will be quoted). when
|
||||
* withNullString("NULL"), NULL String value ("NULL") and null value (null) will be formatted as '"NULL",NULL'. So it
|
||||
* also should be parsed as NULL String value and null value (["NULL", null]), It should be distinguish in parsing. And
|
||||
* when don't set nullString in CSVFormat, String '"",' should be parsed as "" and null (["", null]) according to null
|
||||
* will not be quoted but not null will be quoted. QuoteMode:NON_NUMERIC, same as ALL_NON_NULL.
|
||||
* </p>
|
||||
* <p>
|
||||
* This can solve the problem of distinguishing between empty string columns and absent value columns which just like
|
||||
* Jira CSV-253 to a certain extent.
|
||||
* </p>
|
||||
*/
|
||||
public class JiraCsv93Test {
|
||||
private static Object[] objects1 = {"abc", "", null, "a,b,c", 123};
|
||||
|
||||
private static Object[] objects2 = {"abc", "NULL", null, "a,b,c", 123};
|
||||
|
||||
private void every(final CSVFormat csvFormat, final Object[] objects, final String format, final String[] data)
|
||||
throws IOException {
|
||||
final String source = csvFormat.format(objects);
|
||||
assertEquals(format, csvFormat.format(objects));
|
||||
try (final CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
|
||||
final CSVRecord csvRecord = csvParser.iterator().next();
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
assertEquals(csvRecord.get(i), data[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithNotSetNullString() throws IOException {
|
||||
// @formatter:off
|
||||
every(CSVFormat.DEFAULT,
|
||||
objects1,
|
||||
"abc,,,\"a,b,c\",123",
|
||||
new String[]{"abc", "", "", "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.ALL).build(),
|
||||
objects1,
|
||||
"\"abc\",\"\",,\"a,b,c\",\"123\"",
|
||||
new String[]{"abc", "", "", "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.ALL_NON_NULL).build(),
|
||||
objects1,
|
||||
"\"abc\",\"\",,\"a,b,c\",\"123\"",
|
||||
new String[]{"abc", "", null, "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.MINIMAL).build(),
|
||||
objects1,
|
||||
"abc,,,\"a,b,c\",123",
|
||||
new String[]{"abc", "", "", "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setEscape('?').setQuoteMode(QuoteMode.NONE).build(),
|
||||
objects1,
|
||||
"abc,,,a?,b?,c,123",
|
||||
new String[]{"abc", "", "", "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setQuoteMode(QuoteMode.NON_NUMERIC).build(),
|
||||
objects1,
|
||||
"\"abc\",\"\",,\"a,b,c\",123",
|
||||
new String[]{"abc", "", null, "a,b,c", "123"});
|
||||
// @formatter:on
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithSetNullStringEmptyString() throws IOException {
|
||||
// @formatter:off
|
||||
every(CSVFormat.DEFAULT.builder().setNullString("").build(),
|
||||
objects1,
|
||||
"abc,,,\"a,b,c\",123",
|
||||
new String[]{"abc", null, null, "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.ALL).build(),
|
||||
objects1,
|
||||
"\"abc\",\"\",\"\",\"a,b,c\",\"123\"",
|
||||
new String[]{"abc", null, null, "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.ALL_NON_NULL).build(),
|
||||
objects1,
|
||||
"\"abc\",\"\",,\"a,b,c\",\"123\"",
|
||||
new String[]{"abc", "", null, "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.MINIMAL).build(),
|
||||
objects1,
|
||||
"abc,,,\"a,b,c\",123",
|
||||
new String[]{"abc", null, null, "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setNullString("").setEscape('?').setQuoteMode(QuoteMode.NONE).build(),
|
||||
objects1,
|
||||
"abc,,,a?,b?,c,123",
|
||||
new String[]{"abc", null, null, "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.NON_NUMERIC).build(),
|
||||
objects1,
|
||||
"\"abc\",\"\",,\"a,b,c\",123",
|
||||
new String[]{"abc", "", null, "a,b,c", "123"});
|
||||
// @formatter:on
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithSetNullStringNULL() throws IOException {
|
||||
// @formatter:off
|
||||
every(CSVFormat.DEFAULT.builder().setNullString("NULL").build(),
|
||||
objects2,
|
||||
"abc,NULL,NULL,\"a,b,c\",123",
|
||||
new String[]{"abc", null, null, "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.ALL).build(),
|
||||
objects2,
|
||||
"\"abc\",\"NULL\",\"NULL\",\"a,b,c\",\"123\"",
|
||||
new String[]{"abc", null, null, "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.ALL_NON_NULL).build(),
|
||||
objects2,
|
||||
"\"abc\",\"NULL\",NULL,\"a,b,c\",\"123\"",
|
||||
new String[]{"abc", "NULL", null, "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.MINIMAL).build(),
|
||||
objects2,
|
||||
"abc,NULL,NULL,\"a,b,c\",123",
|
||||
new String[]{"abc", null, null, "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setEscape('?').setQuoteMode(QuoteMode.NONE).build(),
|
||||
objects2,
|
||||
"abc,NULL,NULL,a?,b?,c,123",
|
||||
new String[]{"abc", null, null, "a,b,c", "123"});
|
||||
every(CSVFormat.DEFAULT.builder().setNullString("NULL").setQuoteMode(QuoteMode.NON_NUMERIC).build(),
|
||||
objects2,
|
||||
"\"abc\",\"NULL\",NULL,\"a,b,c\",123",
|
||||
new String[]{"abc", "NULL", null, "a,b,c", "123"});
|
||||
// @formatter:on
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue