From 28441e6a8505cd0e1a34800dd65c1eeacfc5e363 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Sat, 14 Sep 2024 11:43:26 -0400
Subject: [PATCH] Add CSVException that extends IOException thrown on invalid
input instead of IOException
---
src/changes/changes.xml | 1 +
.../org/apache/commons/csv/CSVException.java | 44 ++++++++++++++++
.../org/apache/commons/csv/CSVFormat.java | 1 +
.../org/apache/commons/csv/CSVParser.java | 18 ++++++-
.../java/org/apache/commons/csv/Lexer.java | 50 ++++++++-----------
.../org/apache/commons/csv/CSVParserTest.java | 8 +--
6 files changed, 89 insertions(+), 33 deletions(-)
create mode 100644 src/main/java/org/apache/commons/csv/CSVException.java
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 5b6960a2..efb120ed 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -42,6 +42,7 @@
+ Add CSVException that extends IOException thrown on invalid input instead of IOException.
Fix PMD issues for port to PMD 7.1.0.
Fix some Javadoc links #442.
diff --git a/src/main/java/org/apache/commons/csv/CSVException.java b/src/main/java/org/apache/commons/csv/CSVException.java
new file mode 100644
index 00000000..79e48823
--- /dev/null
+++ b/src/main/java/org/apache/commons/csv/CSVException.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv;
+
+import java.io.IOException;
+import java.util.Formatter;
+import java.util.IllegalFormatException;
+
+/**
+ * Signals a CSV exception. For example, this exception is thrown when parsing invalid input.
+ *
+ * @since 1.12.0
+ */
+public class CSVException extends IOException {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Constructs a new instance with a formatted message.
+ *
+ * @param format A {@link Formatter} format string.
+ * @param args See {@link String#format(String, Object...)}.
+ * @throws IllegalFormatException See {@link String#format(String, Object...)}.
+ */
+ public CSVException(final String format, final Object... args) {
+ super(String.format(format, args));
+ }
+
+}
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index eeffd19b..b9499464 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -2032,6 +2032,7 @@ public final class CSVFormat implements Serializable {
* @param reader the input stream
* @return a parser over a stream of {@link CSVRecord}s.
* @throws IOException If an I/O error occurs
+ * @throws CSVException Thrown on invalid input.
*/
public CSVParser parse(final Reader reader) throws IOException {
return new CSVParser(reader, this);
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index ac33f6b7..626af387 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -145,6 +145,11 @@ public final class CSVParser implements Iterable, Closeable {
final class CSVRecordIterator implements Iterator {
private CSVRecord current;
+ /**
+ * Gets the next record.
+ *
+ * @return the next record.
+ */
private CSVRecord getNextRecord() {
return Uncheck.get(CSVParser.this::nextRecord);
}
@@ -221,6 +226,7 @@ public final class CSVParser implements Iterable, Closeable {
* If the parameters of the format are inconsistent or if either file or format are null.
* @throws IOException
* If an I/O error occurs
+ * @throws CSVException Thrown on invalid input.
*/
public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
Objects.requireNonNull(file, "file");
@@ -246,6 +252,7 @@ public final class CSVParser implements Iterable, Closeable {
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
+ * @throws CSVException Thrown on invalid input.
* @since 1.5
*/
@SuppressWarnings("resource")
@@ -270,6 +277,7 @@ public final class CSVParser implements Iterable, Closeable {
* If the parameters of the format are inconsistent or if either file or format are null.
* @throws IOException
* If an I/O error occurs
+ * @throws CSVException Thrown on invalid input.
* @since 1.5
*/
@SuppressWarnings("resource")
@@ -296,6 +304,7 @@ public final class CSVParser implements Iterable, Closeable {
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
+ * @throws CSVException Thrown on invalid input.
* @since 1.5
*/
public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
@@ -314,6 +323,7 @@ public final class CSVParser implements Iterable, Closeable {
* If the parameters of the format are inconsistent or if either string or format are null.
* @throws IOException
* If an I/O error occurs
+ * @throws CSVException Thrown on invalid input.
*/
public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
Objects.requireNonNull(string, "string");
@@ -341,6 +351,7 @@ public final class CSVParser implements Iterable, Closeable {
* If the parameters of the format are inconsistent or if either url, charset or format are null.
* @throws IOException
* If an I/O error occurs
+ * @throws CSVException Thrown on invalid input.
*/
@SuppressWarnings("resource")
public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
@@ -395,6 +406,7 @@ public final class CSVParser implements Iterable, Closeable {
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
+ * @throws CSVException Thrown on invalid input.
*/
public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
this(reader, format, 0, 1);
@@ -420,6 +432,7 @@ public final class CSVParser implements Iterable, Closeable {
* If the parameters of the format are inconsistent or if either the reader or format is null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
+ * @throws CSVException Thrown on invalid input.
* @since 1.1
*/
@SuppressWarnings("resource")
@@ -465,6 +478,7 @@ public final class CSVParser implements Iterable, Closeable {
*
* @return null if the format has no header.
* @throws IOException if there is a problem reading the header or skipping the first record
+ * @throws CSVException Thrown on invalid input.
*/
private Headers createHeaders() throws IOException {
Map hdrMap = null;
@@ -746,8 +760,8 @@ public final class CSVParser implements Iterable, Closeable {
* Parses the next record from the current point in the stream.
*
* @return the record as an array of values, or {@code null} if the end of the stream has been reached
- * @throws IOException
- * on parse error or input read-failure
+ * @throws IOException on parse error or input read-failure
+ * @throws CSVException Thrown on invalid input.
*/
CSVRecord nextRecord() throws IOException {
CSVRecord result = null;
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index b5d9e29f..e2aec718 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -207,10 +207,10 @@ final class Lexer implements Closeable {
* A token corresponds to a term, a record change or an end-of-file indicator.
*
*
- * @param token
- * an existing Token object to reuse. The caller is responsible for initializing the Token.
+ * @param token an existing Token object to reuse. The caller is responsible for initializing the Token.
* @return the next token found.
- * @throws IOException on stream access error.
+ * @throws IOException on stream access error.
+ * @throws CSVException Thrown on invalid input.
*/
Token nextToken(final Token token) throws IOException {
// Get the last read char (required for empty line detection)
@@ -307,6 +307,7 @@ final class Lexer implements Closeable {
* @throws IOException
* Thrown when in an invalid state: EOF before closing encapsulator or invalid character before
* delimiter or EOL.
+ * @throws CSVException Thrown on invalid input.
*/
private Token parseEncapsulatedToken(final Token token) throws IOException {
token.isQuoted = true;
@@ -342,8 +343,8 @@ final class Lexer implements Closeable {
token.content.append((char) c);
} else if (!Character.isWhitespace((char) c)) {
// error invalid char between token and next delimiter
- throw new IOException(String.format("Invalid char between encapsulated token and delimiter at line: %,d, position: %,d",
- getCurrentLineNumber(), getCharacterPosition()));
+ throw new CSVException("Invalid character between encapsulated token and delimiter at line: %,d, position: %,d",
+ getCurrentLineNumber(), getCharacterPosition());
}
}
}
@@ -356,8 +357,7 @@ final class Lexer implements Closeable {
return token;
}
// error condition (end of file before end of token)
- throw new IOException("(startline " + startLineNumber +
- ") EOF reached before encapsulated token finished");
+ throw new CSVException("(startline %,d) EOF reached before encapsulated token finished", startLineNumber);
} else {
// consume character
token.content.append((char) c);
@@ -368,8 +368,8 @@ final class Lexer implements Closeable {
/**
* Parses a simple token.
*
- * Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped
- * delimiters (as \, or \;). The token is finished when one of the following conditions becomes true:
+ * Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped delimiters (as \, or \;). The token is finished
+ * when one of the following conditions becomes true:
*
*
* - The end of line has been reached (EORECORD)
@@ -377,13 +377,11 @@ final class Lexer implements Closeable {
* - An unescaped delimiter has been reached (TOKEN)
*
*
- * @param token
- * the current token
- * @param ch
- * the current character
+ * @param token the current token
+ * @param ch the current character
* @return the filled token
- * @throws IOException
- * on stream access error
+ * @throws IOException on stream access error
+ * @throws CSVException Thrown on invalid input.
*/
private Token parseSimpleToken(final Token token, int ch) throws IOException {
// Faster to use while(true)+break than while(token.type == INVALID)
@@ -420,10 +418,9 @@ final class Lexer implements Closeable {
/**
* Appends the next escaped character to the token's content.
*
- * @param token
- * the current token
- * @throws IOException
- * on stream access error
+ * @param token the current token
+ * @throws IOException on stream access error
+ * @throws CSVException Thrown on invalid input.
*/
private void appendNextEscapedCharacterToToken(final Token token) throws IOException {
if (isEscapeDelimiter()) {
@@ -467,15 +464,12 @@ final class Lexer implements Closeable {
// TODO escape handling needs more work
/**
- * Handle an escape sequence.
- * The current character must be the escape character.
- * On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()}
- * on the input stream.
+ * Handle an escape sequence. The current character must be the escape character. On return, the next character is available by calling
+ * {@link ExtendedBufferedReader#getLastChar()} on the input stream.
*
- * @return the unescaped character (as an int) or {@link IOUtils#EOF} if char following the escape is
- * invalid.
- * @throws IOException if there is a problem reading the stream or the end of stream is detected:
- * the escape character is not allowed at end of stream
+ * @return the unescaped character (as an int) or {@link IOUtils#EOF} if char following the escape is invalid.
+ * @throws IOException if there is a problem reading the stream or the end of stream is detected: the escape character is not allowed at end of stream
+ * @throws CSVException Thrown on invalid input.
*/
int readEscape() throws IOException {
// the escape char has just been read (normally a backslash)
@@ -498,7 +492,7 @@ final class Lexer implements Closeable {
case Constants.BACKSPACE: // TODO is this correct?
return ch;
case EOF:
- throw new IOException("EOF whilst processing escape sequence");
+ throw new CSVException("EOF while processing escape sequence");
default:
// Now check for meta-characters
if (isMetaChar(ch)) {
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 6b2e2594..4cbbf8e5 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -27,6 +27,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import java.io.File;
import java.io.IOException;
@@ -1555,10 +1556,11 @@ public class CSVParserTest {
.setSkipHeaderRecord(true)
.build();
// @formatter:on
-
try (CSVParser csvParser = csvFormat.parse(stringReader)) {
- final Exception exception = assertThrows(UncheckedIOException.class, csvParser::getRecords);
- assertTrue(exception.getMessage().contains("Invalid char between encapsulated token and delimiter at line: 2, position: 94"));
+ final UncheckedIOException exception = assertThrows(UncheckedIOException.class, csvParser::getRecords);
+ assertInstanceOf(CSVException.class, exception.getCause());
+ assertTrue(exception.getMessage().contains("Invalid character between encapsulated token and delimiter at line: 2, position: 94"),
+ exception::getMessage);
}
}