From c15a06eee95e227fbe5c61849dd12b7bf126016a Mon Sep 17 00:00:00 2001 From: Angus Date: Sun, 20 Feb 2022 00:53:11 +0800 Subject: [PATCH] CSV-288 Fix for multi-char delimiter not working as expected (#218) When checking if previous token is delimiter, isDelimiter(lastChar) unintentionally advance the buffer pointer. Also isDelimiter(lastChar) cannot handle multi-char delimiter. To fix this, create a new indicator isLastTokenDelimiter instead of using isDelimiter(lastChar), the indicator is set/reset in isDelimiter() --- .../java/org/apache/commons/csv/Lexer.java | 11 +- .../commons/csv/issues/JiraCsv288Test.java | 230 ++++++++++++++++++ 2 files changed, 238 insertions(+), 3 deletions(-) create mode 100644 src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java index edd9576c..f424039b 100644 --- a/src/main/java/org/apache/commons/csv/Lexer.java +++ b/src/main/java/org/apache/commons/csv/Lexer.java @@ -62,6 +62,8 @@ final class Lexer implements Closeable { private final ExtendedBufferedReader reader; private String firstEol; + private boolean isLastTokenDelimiter; + Lexer(final CSVFormat format, final ExtendedBufferedReader reader) { this.reader = reader; this.delimiter = format.getDelimiterString().toCharArray(); @@ -124,11 +126,13 @@ final class Lexer implements Closeable { * @throws IOException If an I/O error occurs. */ boolean isDelimiter(final int ch) throws IOException { + isLastTokenDelimiter = false; if (ch != delimiter[0]) { return false; } if (delimiter.length == 1) { - return true; + isLastTokenDelimiter = true; + return true; } reader.lookAhead(delimiterBuf); for (int i = 0; i < delimiterBuf.length; i++) { @@ -137,7 +141,8 @@ final class Lexer implements Closeable { } } final int count = reader.read(delimiterBuf, 0, delimiterBuf.length); - return count != END_OF_STREAM; + isLastTokenDelimiter = count != END_OF_STREAM; + return isLastTokenDelimiter; } /** @@ -243,7 +248,7 @@ final class Lexer implements Closeable { } // did we reach eof during the last iteration already ? EOF - if (isEndOfFile(lastChar) || !isDelimiter(lastChar) && isEndOfFile(c)) { + if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) { token.type = EOF; // don't set token.isReady here because no content return token; diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java new file mode 100644 index 00000000..354f3051 --- /dev/null +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv288Test.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.csv.CSVRecord; +import org.junit.jupiter.api.Test; + +public class JiraCsv288Test { + @Test + // Before fix: + // expected: but was: + public void testParseWithDoublePipeDelimiter() throws Exception { + final Reader in = new StringReader("a||b||c||d||||f"); + StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { + for (CSVRecord csvRecord : csvParser) { + for (int i = 0; i < csvRecord.size(); i++) { + csvPrinter.print(csvRecord.get(i)); + } + assertEquals("a,b,c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + public void testParseWithTriplePipeDelimiter() throws Exception { + final Reader in = new StringReader("a|||b|||c|||d||||||f"); + StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|||").build())) { + for (CSVRecord csvRecord : csvParser) { + for (int i = 0; i < csvRecord.size(); i++) { + csvPrinter.print(csvRecord.get(i)); + } + assertEquals("a,b,c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + public void testParseWithABADelimiter() throws Exception { + final Reader in = new StringReader("a|~|b|~|c|~|d|~||~|f"); + StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|~|").build())) { + for (CSVRecord csvRecord : csvParser) { + for (int i = 0; i < csvRecord.size(); i++) { + csvPrinter.print(csvRecord.get(i)); + } + assertEquals("a,b,c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + public void testParseWithDoublePipeDelimiterQuoted() throws Exception { + final Reader in = new StringReader("a||\"b||c\"||d||||f"); + StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { + for (CSVRecord csvRecord : csvParser) { + for (int i = 0; i < csvRecord.size(); i++) { + csvPrinter.print(csvRecord.get(i)); + } + assertEquals("a,b||c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + public void testParseWithDoublePipeDelimiterEndsWithDelimiter() throws Exception { + final Reader in = new StringReader("a||b||c||d||||f||"); + StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { + for (CSVRecord csvRecord : csvParser) { + for (int i = 0; i < csvRecord.size(); i++) { + csvPrinter.print(csvRecord.get(i)); + } + assertEquals("a,b,c,d,,f,", stringBuilder.toString()); + } + } + } + + @Test + // Before fix: + // expected: but was: + public void testParseWithTwoCharDelimiterEndsWithDelimiter() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f~|"); + StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { + for (CSVRecord csvRecord : csvParser) { + for (int i = 0; i < csvRecord.size(); i++) { + csvPrinter.print(csvRecord.get(i)); + } + assertEquals("a,b,c,d,,f,", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + + public void testParseWithDoublePipeDelimiterDoubleCharValue() throws Exception { + final Reader in = new StringReader("a||bb||cc||dd||f"); + StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) { + for (CSVRecord csvRecord : csvParser) { + for (int i = 0; i < csvRecord.size(); i++) { + csvPrinter.print(csvRecord.get(i)); + } + assertEquals("a,bb,cc,dd,f", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + public void testParseWithTwoCharDelimiter1() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f"); + StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { + for (CSVRecord csvRecord : csvParser) { + for (int i = 0; i < csvRecord.size(); i++) { + csvPrinter.print(csvRecord.get(i)); + } + assertEquals("a,b,c,d,,f", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + public void testParseWithTwoCharDelimiter2() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f~"); + StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { + for (CSVRecord csvRecord : csvParser) { + for (int i = 0; i < csvRecord.size(); i++) { + csvPrinter.print(csvRecord.get(i)); + } + assertEquals("a,b,c,d,,f~", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + public void testParseWithTwoCharDelimiter3() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f|"); + StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { + for (CSVRecord csvRecord : csvParser) { + for (int i = 0; i < csvRecord.size(); i++) { + csvPrinter.print(csvRecord.get(i)); + } + assertEquals("a,b,c,d,,f|", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + public void testParseWithTwoCharDelimiter4() throws Exception { + final Reader in = new StringReader("a~|b~|c~|d~|~|f~~||g"); + StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) { + for (CSVRecord csvRecord : csvParser) { + for (int i = 0; i < csvRecord.size(); i++) { + csvPrinter.print(csvRecord.get(i)); + } + assertEquals("a,b,c,d,,f~,|g", stringBuilder.toString()); + } + } + } + + @Test + // Regression, already passed before fix + public void testParseWithSinglePipeDelimiterEndsWithDelimiter() throws Exception { + final Reader in = new StringReader("a|b|c|d||f|"); + StringBuilder stringBuilder = new StringBuilder(); + try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL); + CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|").build())) { + for (CSVRecord csvRecord : csvParser) { + for (int i = 0; i < csvRecord.size(); i++) { + csvPrinter.print(csvRecord.get(i)); + } + assertEquals("a,b,c,d,,f,", stringBuilder.toString()); + } + } + } +} \ No newline at end of file