adding more details into the error message during the CSV parser issues

This commit is contained in:
gbidsilva 2023-08-30 13:21:53 +05:30
parent e1f8da16d7
commit 72ed67b29d
2 changed files with 75 additions and 1 deletions

View File

@ -355,6 +355,8 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
}
private int maxParsedTokenCount = 5;
private String headerComment;
private String trailerComment;
@ -440,6 +442,44 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
this.recordNumber = recordNumber - 1;
}
/**
* Return the parsed CSV content of current reading line up until this method is called.
* <p>
* Maximum parsed token length set by the 'maxParsedTokenCount' is considered during the construction of return string.
* </p>
* <p>
* Example:
* </p>
* </p>
* If currently reading CSV record row contains following data and 'maxParsedTokenCount' is set to 5 and current reading position is col7
* </p>
* <pre>
* col1, col2, col3, col4, col5, col6, col7
* </pre>
* <p>
* then this would return following
* </p>
* <pre>
* col3, col4, col5, col6, col7
* </pre>
* @return parsed CSV content of current reading line
*/
private String getLastParsedContent(){
String parsedContent = "";
int recordListSize = this.recordList.size();
if(recordListSize > 0) {
if(recordListSize <= this.maxParsedTokenCount) {
parsedContent = String.join("", this.recordList.toArray(Constants.EMPTY_STRING_ARRAY));
} else {
// number of parsed token exceed required token count. Take the expected tokens from the end.
int startIndex = recordListSize - maxParsedTokenCount;
List<String> lastParsedTokenList = this.recordList.subList(startIndex, recordListSize);
parsedContent = "..." + String.join(this.format.getDelimiterString(), lastParsedTokenList.toArray(Constants.EMPTY_STRING_ARRAY));
}
}
return parsedContent;
}
private void addRecordValue(final boolean lastRecord) {
final String input = this.format.trim(this.reusableToken.content.toString());
if (lastRecord && input.isEmpty() && this.format.getTrailingDelimiter()) {
@ -767,7 +807,15 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
final long startCharPosition = lexer.getCharacterPosition() + this.characterOffset;
do {
this.reusableToken.reset();
this.lexer.nextToken(this.reusableToken);
// https://issues.apache.org/jira/browse/CSV-147
try{
this.lexer.nextToken(this.reusableToken);
} catch (IOException ioe){
String errorMessage = "An error occurred while tying to parse the CSV content. Error in line: "
+ this.lexer.getCurrentLineNumber() + ", position: " + this.lexer.getCharacterPosition()
+ ", last parsed content: " + this.getLastParsedContent();
throw new IOException(errorMessage, ioe);
}
switch (this.reusableToken.type) {
case TOKEN:
this.addRecordValue(false);

View File

@ -1642,4 +1642,30 @@ public class CSVParserTest {
parser.close();
}
@Test
public void testFaultyCSVShouldThrowErrorWithDetailedMessage() {
String csvContent = "col1,col2,col3,col4,col5,col6,col7,col8,col9,col10\n" +
"rec1,rec2,rec3,rec4,rec5,rec6,rec7,rec8,\"\"rec9\"\",rec10";
StringReader stringReader = new StringReader(csvContent);
CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
.setHeader()
.setSkipHeaderRecord(true)
.build();
Exception exception = assertThrows(UncheckedIOException.class, ()-> {
Iterable<CSVRecord> records = csvFormat.parse(stringReader);
for (CSVRecord record : records) {
System.out.println(record.get(0) + " " + record.get(1) + " " + record.get(2) + " " + record.get(3) + " " + record.get(4) + " " + record.get(5) + " " + record.get(6) + " " + record.get(7) + " " + record.get(8) + " " + record.get(9));
}
});
String expectedErrorMessage = "IOException reading next record: java.io.IOException: An error occurred while tying to parse the CSV content. Error in line: 2, position: 94, last parsed content: ...rec4,rec5,rec6,rec7,rec8";
String actualMessage = exception.getMessage();
assertTrue(actualMessage.contains(expectedErrorMessage));
}
}