CSV-288 Fix for multi-char delimiter not working as expected (#218)

When checking if previous token is delimiter, isDelimiter(lastChar) unintentionally advance the buffer pointer. Also isDelimiter(lastChar) cannot handle multi-char delimiter. To fix this, create a new indicator isLastTokenDelimiter instead of using isDelimiter(lastChar), the indicator is set/reset in isDelimiter()
This commit is contained in:
Angus 2022-02-20 00:53:11 +08:00 committed by GitHub
parent 94711ebf25
commit c15a06eee9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 238 additions and 3 deletions

View File

@ -62,6 +62,8 @@ final class Lexer implements Closeable {
private final ExtendedBufferedReader reader;
private String firstEol;
private boolean isLastTokenDelimiter;
Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
this.reader = reader;
this.delimiter = format.getDelimiterString().toCharArray();
@ -124,11 +126,13 @@ final class Lexer implements Closeable {
* @throws IOException If an I/O error occurs.
*/
boolean isDelimiter(final int ch) throws IOException {
isLastTokenDelimiter = false;
if (ch != delimiter[0]) {
return false;
}
if (delimiter.length == 1) {
return true;
isLastTokenDelimiter = true;
return true;
}
reader.lookAhead(delimiterBuf);
for (int i = 0; i < delimiterBuf.length; i++) {
@ -137,7 +141,8 @@ final class Lexer implements Closeable {
}
}
final int count = reader.read(delimiterBuf, 0, delimiterBuf.length);
return count != END_OF_STREAM;
isLastTokenDelimiter = count != END_OF_STREAM;
return isLastTokenDelimiter;
}
/**
@ -243,7 +248,7 @@ final class Lexer implements Closeable {
}
// did we reach eof during the last iteration already ? EOF
if (isEndOfFile(lastChar) || !isDelimiter(lastChar) && isEndOfFile(c)) {
if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) {
token.type = EOF;
// don't set token.isReady here because no content
return token;

View File

@ -0,0 +1,230 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv.issues;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.Reader;
import java.io.StringReader;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.junit.jupiter.api.Test;
public class JiraCsv288Test {
@Test
// Before fix:
// expected: <a,b,c,d,,f> but was: <a,b|c,d,|f>
public void testParseWithDoublePipeDelimiter() throws Exception {
final Reader in = new StringReader("a||b||c||d||||f");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f", stringBuilder.toString());
}
}
}
@Test
// Before fix:
// expected: <a,b,c,d,,f> but was: <a,b|c,d,|f>
public void testParseWithTriplePipeDelimiter() throws Exception {
final Reader in = new StringReader("a|||b|||c|||d||||||f");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|||").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f", stringBuilder.toString());
}
}
}
@Test
// Before fix:
// expected: <a,b,c,d,,f> but was: <a,b,c,d,|f>
public void testParseWithABADelimiter() throws Exception {
final Reader in = new StringReader("a|~|b|~|c|~|d|~||~|f");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|~|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f", stringBuilder.toString());
}
}
}
@Test
// Before fix:
// expected: <a,b||c,d,,f> but was: <a,b||c,d,|f>
public void testParseWithDoublePipeDelimiterQuoted() throws Exception {
final Reader in = new StringReader("a||\"b||c\"||d||||f");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b||c,d,,f", stringBuilder.toString());
}
}
}
@Test
// Before fix:
// expected: <a,b,c,d,,f,> but was: <a,b|c,d,|f>
public void testParseWithDoublePipeDelimiterEndsWithDelimiter() throws Exception {
final Reader in = new StringReader("a||b||c||d||||f||");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f,", stringBuilder.toString());
}
}
}
@Test
// Before fix:
// expected: <a,b,c,d,,f,> but was: <a,b,c,d,,f>
public void testParseWithTwoCharDelimiterEndsWithDelimiter() throws Exception {
final Reader in = new StringReader("a~|b~|c~|d~|~|f~|");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f,", stringBuilder.toString());
}
}
}
@Test
// Regression, already passed before fix
public void testParseWithDoublePipeDelimiterDoubleCharValue() throws Exception {
final Reader in = new StringReader("a||bb||cc||dd||f");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,bb,cc,dd,f", stringBuilder.toString());
}
}
}
@Test
// Regression, already passed before fix
public void testParseWithTwoCharDelimiter1() throws Exception {
final Reader in = new StringReader("a~|b~|c~|d~|~|f");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f", stringBuilder.toString());
}
}
}
@Test
// Regression, already passed before fix
public void testParseWithTwoCharDelimiter2() throws Exception {
final Reader in = new StringReader("a~|b~|c~|d~|~|f~");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f~", stringBuilder.toString());
}
}
}
@Test
// Regression, already passed before fix
public void testParseWithTwoCharDelimiter3() throws Exception {
final Reader in = new StringReader("a~|b~|c~|d~|~|f|");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f|", stringBuilder.toString());
}
}
}
@Test
// Regression, already passed before fix
public void testParseWithTwoCharDelimiter4() throws Exception {
final Reader in = new StringReader("a~|b~|c~|d~|~|f~~||g");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f~,|g", stringBuilder.toString());
}
}
}
@Test
// Regression, already passed before fix
public void testParseWithSinglePipeDelimiterEndsWithDelimiter() throws Exception {
final Reader in = new StringReader("a|b|c|d||f|");
StringBuilder stringBuilder = new StringBuilder();
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|").build())) {
for (CSVRecord csvRecord : csvParser) {
for (int i = 0; i < csvRecord.size(); i++) {
csvPrinter.print(csvRecord.get(i));
}
assertEquals("a,b,c,d,,f,", stringBuilder.toString());
}
}
}
}