Add CSVException that extends IOException thrown on invalid input

instead of IOException
This commit is contained in:
Gary Gregory 2024-09-14 11:43:26 -04:00
parent 761a33730c
commit 28441e6a85
6 changed files with 89 additions and 33 deletions

View File

@ -42,6 +42,7 @@
<body>
<release version="1.11.1" date="YYYY-MM-DD" description="Feature and bug fix release (Java 8 or above)">
<!-- ADD -->
<action type="add" dev="ggregory" due-to="Gary Gregory">Add CSVException that extends IOException thrown on invalid input instead of IOException.</action>
<!-- FIX -->
<action type="fix" dev="ggregory" due-to="Gary Gregory">Fix PMD issues for port to PMD 7.1.0.</action>
<action type="fix" dev="ggregory" due-to="Dávid Szigecsán, Gary Gregory">Fix some Javadoc links #442.</action>

View File

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import java.io.IOException;
import java.util.Formatter;
import java.util.IllegalFormatException;
/**
* Signals a CSV exception. For example, this exception is thrown when parsing invalid input.
*
* @since 1.12.0
*/
public class CSVException extends IOException {
private static final long serialVersionUID = 1L;
/**
* Constructs a new instance with a formatted message.
*
* @param format A {@link Formatter} format string.
* @param args See {@link String#format(String, Object...)}.
* @throws IllegalFormatException See {@link String#format(String, Object...)}.
*/
public CSVException(final String format, final Object... args) {
super(String.format(format, args));
}
}

View File

@ -2032,6 +2032,7 @@ public final class CSVFormat implements Serializable {
* @param reader the input stream
* @return a parser over a stream of {@link CSVRecord}s.
* @throws IOException If an I/O error occurs
* @throws CSVException Thrown on invalid input.
*/
public CSVParser parse(final Reader reader) throws IOException {
return new CSVParser(reader, this);

View File

@ -145,6 +145,11 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
final class CSVRecordIterator implements Iterator<CSVRecord> {
private CSVRecord current;
/**
* Gets the next record.
*
* @return the next record.
*/
private CSVRecord getNextRecord() {
return Uncheck.get(CSVParser.this::nextRecord);
}
@ -221,6 +226,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
* If the parameters of the format are inconsistent or if either file or format are null.
* @throws IOException
* If an I/O error occurs
* @throws CSVException Thrown on invalid input.
*/
public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
Objects.requireNonNull(file, "file");
@ -246,6 +252,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
* @throws CSVException Thrown on invalid input.
* @since 1.5
*/
@SuppressWarnings("resource")
@ -270,6 +277,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
* If the parameters of the format are inconsistent or if either file or format are null.
* @throws IOException
* If an I/O error occurs
* @throws CSVException Thrown on invalid input.
* @since 1.5
*/
@SuppressWarnings("resource")
@ -296,6 +304,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
* @throws CSVException Thrown on invalid input.
* @since 1.5
*/
public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
@ -314,6 +323,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
* If the parameters of the format are inconsistent or if either string or format are null.
* @throws IOException
* If an I/O error occurs
* @throws CSVException Thrown on invalid input.
*/
public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
Objects.requireNonNull(string, "string");
@ -341,6 +351,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
* If the parameters of the format are inconsistent or if either url, charset or format are null.
* @throws IOException
* If an I/O error occurs
* @throws CSVException Thrown on invalid input.
*/
@SuppressWarnings("resource")
public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
@ -395,6 +406,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
* @throws CSVException Thrown on invalid input.
*/
public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
this(reader, format, 0, 1);
@ -420,6 +432,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
* If the parameters of the format are inconsistent or if either the reader or format is null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
* @throws CSVException Thrown on invalid input.
* @since 1.1
*/
@SuppressWarnings("resource")
@ -465,6 +478,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
*
* @return null if the format has no header.
* @throws IOException if there is a problem reading the header or skipping the first record
* @throws CSVException Thrown on invalid input.
*/
private Headers createHeaders() throws IOException {
Map<String, Integer> hdrMap = null;
@ -746,8 +760,8 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
* Parses the next record from the current point in the stream.
*
* @return the record as an array of values, or {@code null} if the end of the stream has been reached
* @throws IOException
* on parse error or input read-failure
* @throws IOException on parse error or input read-failure
* @throws CSVException Thrown on invalid input.
*/
CSVRecord nextRecord() throws IOException {
CSVRecord result = null;

View File

@ -207,10 +207,10 @@ final class Lexer implements Closeable {
* A token corresponds to a term, a record change or an end-of-file indicator.
* </p>
*
* @param token
* an existing Token object to reuse. The caller is responsible for initializing the Token.
* @param token an existing Token object to reuse. The caller is responsible for initializing the Token.
* @return the next token found.
* @throws IOException on stream access error.
* @throws CSVException Thrown on invalid input.
*/
Token nextToken(final Token token) throws IOException {
// Get the last read char (required for empty line detection)
@ -307,6 +307,7 @@ final class Lexer implements Closeable {
* @throws IOException
* Thrown when in an invalid state: EOF before closing encapsulator or invalid character before
* delimiter or EOL.
* @throws CSVException Thrown on invalid input.
*/
private Token parseEncapsulatedToken(final Token token) throws IOException {
token.isQuoted = true;
@ -342,8 +343,8 @@ final class Lexer implements Closeable {
token.content.append((char) c);
} else if (!Character.isWhitespace((char) c)) {
// error invalid char between token and next delimiter
throw new IOException(String.format("Invalid char between encapsulated token and delimiter at line: %,d, position: %,d",
getCurrentLineNumber(), getCharacterPosition()));
throw new CSVException("Invalid character between encapsulated token and delimiter at line: %,d, position: %,d",
getCurrentLineNumber(), getCharacterPosition());
}
}
}
@ -356,8 +357,7 @@ final class Lexer implements Closeable {
return token;
}
// error condition (end of file before end of token)
throw new IOException("(startline " + startLineNumber +
") EOF reached before encapsulated token finished");
throw new CSVException("(startline %,d) EOF reached before encapsulated token finished", startLineNumber);
} else {
// consume character
token.content.append((char) c);
@ -368,8 +368,8 @@ final class Lexer implements Closeable {
/**
* Parses a simple token.
* <p>
* Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped
* delimiters (as \, or \;). The token is finished when one of the following conditions becomes true:
* Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped delimiters (as \, or \;). The token is finished
* when one of the following conditions becomes true:
* </p>
* <ul>
* <li>The end of line has been reached (EORECORD)</li>
@ -377,13 +377,11 @@ final class Lexer implements Closeable {
* <li>An unescaped delimiter has been reached (TOKEN)</li>
* </ul>
*
* @param token
* the current token
* @param ch
* the current character
* @param token the current token
* @param ch the current character
* @return the filled token
* @throws IOException
* on stream access error
* @throws IOException on stream access error
* @throws CSVException Thrown on invalid input.
*/
private Token parseSimpleToken(final Token token, int ch) throws IOException {
// Faster to use while(true)+break than while(token.type == INVALID)
@ -420,10 +418,9 @@ final class Lexer implements Closeable {
/**
* Appends the next escaped character to the token's content.
*
* @param token
* the current token
* @throws IOException
* on stream access error
* @param token the current token
* @throws IOException on stream access error
* @throws CSVException Thrown on invalid input.
*/
private void appendNextEscapedCharacterToToken(final Token token) throws IOException {
if (isEscapeDelimiter()) {
@ -467,15 +464,12 @@ final class Lexer implements Closeable {
// TODO escape handling needs more work
/**
* Handle an escape sequence.
* The current character must be the escape character.
* On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()}
* on the input stream.
* Handle an escape sequence. The current character must be the escape character. On return, the next character is available by calling
* {@link ExtendedBufferedReader#getLastChar()} on the input stream.
*
* @return the unescaped character (as an int) or {@link IOUtils#EOF} if char following the escape is
* invalid.
* @throws IOException if there is a problem reading the stream or the end of stream is detected:
* the escape character is not allowed at end of stream
* @return the unescaped character (as an int) or {@link IOUtils#EOF} if char following the escape is invalid.
* @throws IOException if there is a problem reading the stream or the end of stream is detected: the escape character is not allowed at end of stream
* @throws CSVException Thrown on invalid input.
*/
int readEscape() throws IOException {
// the escape char has just been read (normally a backslash)
@ -498,7 +492,7 @@ final class Lexer implements Closeable {
case Constants.BACKSPACE: // TODO is this correct?
return ch;
case EOF:
throw new IOException("EOF whilst processing escape sequence");
throw new CSVException("EOF while processing escape sequence");
default:
// Now check for meta-characters
if (isMetaChar(ch)) {

View File

@ -27,6 +27,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import java.io.File;
import java.io.IOException;
@ -1555,10 +1556,11 @@ public class CSVParserTest {
.setSkipHeaderRecord(true)
.build();
// @formatter:on
try (CSVParser csvParser = csvFormat.parse(stringReader)) {
final Exception exception = assertThrows(UncheckedIOException.class, csvParser::getRecords);
assertTrue(exception.getMessage().contains("Invalid char between encapsulated token and delimiter at line: 2, position: 94"));
final UncheckedIOException exception = assertThrows(UncheckedIOException.class, csvParser::getRecords);
assertInstanceOf(CSVException.class, exception.getCause());
assertTrue(exception.getMessage().contains("Invalid character between encapsulated token and delimiter at line: 2, position: 94"),
exception::getMessage);
}
}