From 28441e6a8505cd0e1a34800dd65c1eeacfc5e363 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Sat, 14 Sep 2024 11:43:26 -0400 Subject: [PATCH] Add CSVException that extends IOException thrown on invalid input instead of IOException --- src/changes/changes.xml | 1 + .../org/apache/commons/csv/CSVException.java | 44 ++++++++++++++++ .../org/apache/commons/csv/CSVFormat.java | 1 + .../org/apache/commons/csv/CSVParser.java | 18 ++++++- .../java/org/apache/commons/csv/Lexer.java | 50 ++++++++----------- .../org/apache/commons/csv/CSVParserTest.java | 8 +-- 6 files changed, 89 insertions(+), 33 deletions(-) create mode 100644 src/main/java/org/apache/commons/csv/CSVException.java diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 5b6960a2..efb120ed 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -42,6 +42,7 @@ + Add CSVException that extends IOException thrown on invalid input instead of IOException. Fix PMD issues for port to PMD 7.1.0. Fix some Javadoc links #442. diff --git a/src/main/java/org/apache/commons/csv/CSVException.java b/src/main/java/org/apache/commons/csv/CSVException.java new file mode 100644 index 00000000..79e48823 --- /dev/null +++ b/src/main/java/org/apache/commons/csv/CSVException.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv; + +import java.io.IOException; +import java.util.Formatter; +import java.util.IllegalFormatException; + +/** + * Signals a CSV exception. For example, this exception is thrown when parsing invalid input. + * + * @since 1.12.0 + */ +public class CSVException extends IOException { + + private static final long serialVersionUID = 1L; + + /** + * Constructs a new instance with a formatted message. + * + * @param format A {@link Formatter} format string. + * @param args See {@link String#format(String, Object...)}. + * @throws IllegalFormatException See {@link String#format(String, Object...)}. + */ + public CSVException(final String format, final Object... args) { + super(String.format(format, args)); + } + +} diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index eeffd19b..b9499464 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -2032,6 +2032,7 @@ public final class CSVFormat implements Serializable { * @param reader the input stream * @return a parser over a stream of {@link CSVRecord}s. * @throws IOException If an I/O error occurs + * @throws CSVException Thrown on invalid input. */ public CSVParser parse(final Reader reader) throws IOException { return new CSVParser(reader, this); diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index ac33f6b7..626af387 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -145,6 +145,11 @@ public final class CSVParser implements Iterable, Closeable { final class CSVRecordIterator implements Iterator { private CSVRecord current; + /** + * Gets the next record. + * + * @return the next record. + */ private CSVRecord getNextRecord() { return Uncheck.get(CSVParser.this::nextRecord); } @@ -221,6 +226,7 @@ public final class CSVParser implements Iterable, Closeable { * If the parameters of the format are inconsistent or if either file or format are null. * @throws IOException * If an I/O error occurs + * @throws CSVException Thrown on invalid input. */ public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException { Objects.requireNonNull(file, "file"); @@ -246,6 +252,7 @@ public final class CSVParser implements Iterable, Closeable { * If the parameters of the format are inconsistent or if either reader or format are null. * @throws IOException * If there is a problem reading the header or skipping the first record + * @throws CSVException Thrown on invalid input. * @since 1.5 */ @SuppressWarnings("resource") @@ -270,6 +277,7 @@ public final class CSVParser implements Iterable, Closeable { * If the parameters of the format are inconsistent or if either file or format are null. * @throws IOException * If an I/O error occurs + * @throws CSVException Thrown on invalid input. * @since 1.5 */ @SuppressWarnings("resource") @@ -296,6 +304,7 @@ public final class CSVParser implements Iterable, Closeable { * If the parameters of the format are inconsistent or if either reader or format are null. * @throws IOException * If there is a problem reading the header or skipping the first record + * @throws CSVException Thrown on invalid input. * @since 1.5 */ public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException { @@ -314,6 +323,7 @@ public final class CSVParser implements Iterable, Closeable { * If the parameters of the format are inconsistent or if either string or format are null. * @throws IOException * If an I/O error occurs + * @throws CSVException Thrown on invalid input. */ public static CSVParser parse(final String string, final CSVFormat format) throws IOException { Objects.requireNonNull(string, "string"); @@ -341,6 +351,7 @@ public final class CSVParser implements Iterable, Closeable { * If the parameters of the format are inconsistent or if either url, charset or format are null. * @throws IOException * If an I/O error occurs + * @throws CSVException Thrown on invalid input. */ @SuppressWarnings("resource") public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException { @@ -395,6 +406,7 @@ public final class CSVParser implements Iterable, Closeable { * If the parameters of the format are inconsistent or if either reader or format are null. * @throws IOException * If there is a problem reading the header or skipping the first record + * @throws CSVException Thrown on invalid input. */ public CSVParser(final Reader reader, final CSVFormat format) throws IOException { this(reader, format, 0, 1); @@ -420,6 +432,7 @@ public final class CSVParser implements Iterable, Closeable { * If the parameters of the format are inconsistent or if either the reader or format is null. * @throws IOException * If there is a problem reading the header or skipping the first record + * @throws CSVException Thrown on invalid input. * @since 1.1 */ @SuppressWarnings("resource") @@ -465,6 +478,7 @@ public final class CSVParser implements Iterable, Closeable { * * @return null if the format has no header. * @throws IOException if there is a problem reading the header or skipping the first record + * @throws CSVException Thrown on invalid input. */ private Headers createHeaders() throws IOException { Map hdrMap = null; @@ -746,8 +760,8 @@ public final class CSVParser implements Iterable, Closeable { * Parses the next record from the current point in the stream. * * @return the record as an array of values, or {@code null} if the end of the stream has been reached - * @throws IOException - * on parse error or input read-failure + * @throws IOException on parse error or input read-failure + * @throws CSVException Thrown on invalid input. */ CSVRecord nextRecord() throws IOException { CSVRecord result = null; diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java index b5d9e29f..e2aec718 100644 --- a/src/main/java/org/apache/commons/csv/Lexer.java +++ b/src/main/java/org/apache/commons/csv/Lexer.java @@ -207,10 +207,10 @@ final class Lexer implements Closeable { * A token corresponds to a term, a record change or an end-of-file indicator. *

* - * @param token - * an existing Token object to reuse. The caller is responsible for initializing the Token. + * @param token an existing Token object to reuse. The caller is responsible for initializing the Token. * @return the next token found. - * @throws IOException on stream access error. + * @throws IOException on stream access error. + * @throws CSVException Thrown on invalid input. */ Token nextToken(final Token token) throws IOException { // Get the last read char (required for empty line detection) @@ -307,6 +307,7 @@ final class Lexer implements Closeable { * @throws IOException * Thrown when in an invalid state: EOF before closing encapsulator or invalid character before * delimiter or EOL. + * @throws CSVException Thrown on invalid input. */ private Token parseEncapsulatedToken(final Token token) throws IOException { token.isQuoted = true; @@ -342,8 +343,8 @@ final class Lexer implements Closeable { token.content.append((char) c); } else if (!Character.isWhitespace((char) c)) { // error invalid char between token and next delimiter - throw new IOException(String.format("Invalid char between encapsulated token and delimiter at line: %,d, position: %,d", - getCurrentLineNumber(), getCharacterPosition())); + throw new CSVException("Invalid character between encapsulated token and delimiter at line: %,d, position: %,d", + getCurrentLineNumber(), getCharacterPosition()); } } } @@ -356,8 +357,7 @@ final class Lexer implements Closeable { return token; } // error condition (end of file before end of token) - throw new IOException("(startline " + startLineNumber + - ") EOF reached before encapsulated token finished"); + throw new CSVException("(startline %,d) EOF reached before encapsulated token finished", startLineNumber); } else { // consume character token.content.append((char) c); @@ -368,8 +368,8 @@ final class Lexer implements Closeable { /** * Parses a simple token. *

- * Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped - * delimiters (as \, or \;). The token is finished when one of the following conditions becomes true: + * Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped delimiters (as \, or \;). The token is finished + * when one of the following conditions becomes true: *

*
    *
  • The end of line has been reached (EORECORD)
  • @@ -377,13 +377,11 @@ final class Lexer implements Closeable { *
  • An unescaped delimiter has been reached (TOKEN)
  • *
* - * @param token - * the current token - * @param ch - * the current character + * @param token the current token + * @param ch the current character * @return the filled token - * @throws IOException - * on stream access error + * @throws IOException on stream access error + * @throws CSVException Thrown on invalid input. */ private Token parseSimpleToken(final Token token, int ch) throws IOException { // Faster to use while(true)+break than while(token.type == INVALID) @@ -420,10 +418,9 @@ final class Lexer implements Closeable { /** * Appends the next escaped character to the token's content. * - * @param token - * the current token - * @throws IOException - * on stream access error + * @param token the current token + * @throws IOException on stream access error + * @throws CSVException Thrown on invalid input. */ private void appendNextEscapedCharacterToToken(final Token token) throws IOException { if (isEscapeDelimiter()) { @@ -467,15 +464,12 @@ final class Lexer implements Closeable { // TODO escape handling needs more work /** - * Handle an escape sequence. - * The current character must be the escape character. - * On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()} - * on the input stream. + * Handle an escape sequence. The current character must be the escape character. On return, the next character is available by calling + * {@link ExtendedBufferedReader#getLastChar()} on the input stream. * - * @return the unescaped character (as an int) or {@link IOUtils#EOF} if char following the escape is - * invalid. - * @throws IOException if there is a problem reading the stream or the end of stream is detected: - * the escape character is not allowed at end of stream + * @return the unescaped character (as an int) or {@link IOUtils#EOF} if char following the escape is invalid. + * @throws IOException if there is a problem reading the stream or the end of stream is detected: the escape character is not allowed at end of stream + * @throws CSVException Thrown on invalid input. */ int readEscape() throws IOException { // the escape char has just been read (normally a backslash) @@ -498,7 +492,7 @@ final class Lexer implements Closeable { case Constants.BACKSPACE: // TODO is this correct? return ch; case EOF: - throw new IOException("EOF whilst processing escape sequence"); + throw new CSVException("EOF while processing escape sequence"); default: // Now check for meta-characters if (isMetaChar(ch)) { diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index 6b2e2594..4cbbf8e5 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -27,6 +27,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; import java.io.File; import java.io.IOException; @@ -1555,10 +1556,11 @@ public class CSVParserTest { .setSkipHeaderRecord(true) .build(); // @formatter:on - try (CSVParser csvParser = csvFormat.parse(stringReader)) { - final Exception exception = assertThrows(UncheckedIOException.class, csvParser::getRecords); - assertTrue(exception.getMessage().contains("Invalid char between encapsulated token and delimiter at line: 2, position: 94")); + final UncheckedIOException exception = assertThrows(UncheckedIOException.class, csvParser::getRecords); + assertInstanceOf(CSVException.class, exception.getCause()); + assertTrue(exception.getMessage().contains("Invalid character between encapsulated token and delimiter at line: 2, position: 94"), + exception::getMessage); } }