CSV-288 Fix for multi-char delimiter not working as expected (#218)
When checking if previous token is delimiter, isDelimiter(lastChar) unintentionally advance the buffer pointer. Also isDelimiter(lastChar) cannot handle multi-char delimiter. To fix this, create a new indicator isLastTokenDelimiter instead of using isDelimiter(lastChar), the indicator is set/reset in isDelimiter()
This commit is contained in:
parent
94711ebf25
commit
c15a06eee9
|
@ -62,6 +62,8 @@ final class Lexer implements Closeable {
|
||||||
private final ExtendedBufferedReader reader;
|
private final ExtendedBufferedReader reader;
|
||||||
private String firstEol;
|
private String firstEol;
|
||||||
|
|
||||||
|
private boolean isLastTokenDelimiter;
|
||||||
|
|
||||||
Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
|
Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
this.delimiter = format.getDelimiterString().toCharArray();
|
this.delimiter = format.getDelimiterString().toCharArray();
|
||||||
|
@ -124,11 +126,13 @@ final class Lexer implements Closeable {
|
||||||
* @throws IOException If an I/O error occurs.
|
* @throws IOException If an I/O error occurs.
|
||||||
*/
|
*/
|
||||||
boolean isDelimiter(final int ch) throws IOException {
|
boolean isDelimiter(final int ch) throws IOException {
|
||||||
|
isLastTokenDelimiter = false;
|
||||||
if (ch != delimiter[0]) {
|
if (ch != delimiter[0]) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (delimiter.length == 1) {
|
if (delimiter.length == 1) {
|
||||||
return true;
|
isLastTokenDelimiter = true;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
reader.lookAhead(delimiterBuf);
|
reader.lookAhead(delimiterBuf);
|
||||||
for (int i = 0; i < delimiterBuf.length; i++) {
|
for (int i = 0; i < delimiterBuf.length; i++) {
|
||||||
|
@ -137,7 +141,8 @@ final class Lexer implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final int count = reader.read(delimiterBuf, 0, delimiterBuf.length);
|
final int count = reader.read(delimiterBuf, 0, delimiterBuf.length);
|
||||||
return count != END_OF_STREAM;
|
isLastTokenDelimiter = count != END_OF_STREAM;
|
||||||
|
return isLastTokenDelimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -243,7 +248,7 @@ final class Lexer implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// did we reach eof during the last iteration already ? EOF
|
// did we reach eof during the last iteration already ? EOF
|
||||||
if (isEndOfFile(lastChar) || !isDelimiter(lastChar) && isEndOfFile(c)) {
|
if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) {
|
||||||
token.type = EOF;
|
token.type = EOF;
|
||||||
// don't set token.isReady here because no content
|
// don't set token.isReady here because no content
|
||||||
return token;
|
return token;
|
||||||
|
|
|
@ -0,0 +1,230 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.commons.csv.issues;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.commons.csv.CSVFormat;
|
||||||
|
import org.apache.commons.csv.CSVParser;
|
||||||
|
import org.apache.commons.csv.CSVPrinter;
|
||||||
|
import org.apache.commons.csv.CSVRecord;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
public class JiraCsv288Test {
|
||||||
|
@Test
|
||||||
|
// Before fix:
|
||||||
|
// expected: <a,b,c,d,,f> but was: <a,b|c,d,|f>
|
||||||
|
public void testParseWithDoublePipeDelimiter() throws Exception {
|
||||||
|
final Reader in = new StringReader("a||b||c||d||||f");
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
|
||||||
|
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
|
||||||
|
for (CSVRecord csvRecord : csvParser) {
|
||||||
|
for (int i = 0; i < csvRecord.size(); i++) {
|
||||||
|
csvPrinter.print(csvRecord.get(i));
|
||||||
|
}
|
||||||
|
assertEquals("a,b,c,d,,f", stringBuilder.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// Before fix:
|
||||||
|
// expected: <a,b,c,d,,f> but was: <a,b|c,d,|f>
|
||||||
|
public void testParseWithTriplePipeDelimiter() throws Exception {
|
||||||
|
final Reader in = new StringReader("a|||b|||c|||d||||||f");
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
|
||||||
|
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|||").build())) {
|
||||||
|
for (CSVRecord csvRecord : csvParser) {
|
||||||
|
for (int i = 0; i < csvRecord.size(); i++) {
|
||||||
|
csvPrinter.print(csvRecord.get(i));
|
||||||
|
}
|
||||||
|
assertEquals("a,b,c,d,,f", stringBuilder.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// Before fix:
|
||||||
|
// expected: <a,b,c,d,,f> but was: <a,b,c,d,|f>
|
||||||
|
public void testParseWithABADelimiter() throws Exception {
|
||||||
|
final Reader in = new StringReader("a|~|b|~|c|~|d|~||~|f");
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
|
||||||
|
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|~|").build())) {
|
||||||
|
for (CSVRecord csvRecord : csvParser) {
|
||||||
|
for (int i = 0; i < csvRecord.size(); i++) {
|
||||||
|
csvPrinter.print(csvRecord.get(i));
|
||||||
|
}
|
||||||
|
assertEquals("a,b,c,d,,f", stringBuilder.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// Before fix:
|
||||||
|
// expected: <a,b||c,d,,f> but was: <a,b||c,d,|f>
|
||||||
|
public void testParseWithDoublePipeDelimiterQuoted() throws Exception {
|
||||||
|
final Reader in = new StringReader("a||\"b||c\"||d||||f");
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
|
||||||
|
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
|
||||||
|
for (CSVRecord csvRecord : csvParser) {
|
||||||
|
for (int i = 0; i < csvRecord.size(); i++) {
|
||||||
|
csvPrinter.print(csvRecord.get(i));
|
||||||
|
}
|
||||||
|
assertEquals("a,b||c,d,,f", stringBuilder.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// Before fix:
|
||||||
|
// expected: <a,b,c,d,,f,> but was: <a,b|c,d,|f>
|
||||||
|
public void testParseWithDoublePipeDelimiterEndsWithDelimiter() throws Exception {
|
||||||
|
final Reader in = new StringReader("a||b||c||d||||f||");
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
|
||||||
|
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
|
||||||
|
for (CSVRecord csvRecord : csvParser) {
|
||||||
|
for (int i = 0; i < csvRecord.size(); i++) {
|
||||||
|
csvPrinter.print(csvRecord.get(i));
|
||||||
|
}
|
||||||
|
assertEquals("a,b,c,d,,f,", stringBuilder.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// Before fix:
|
||||||
|
// expected: <a,b,c,d,,f,> but was: <a,b,c,d,,f>
|
||||||
|
public void testParseWithTwoCharDelimiterEndsWithDelimiter() throws Exception {
|
||||||
|
final Reader in = new StringReader("a~|b~|c~|d~|~|f~|");
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
|
||||||
|
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
|
||||||
|
for (CSVRecord csvRecord : csvParser) {
|
||||||
|
for (int i = 0; i < csvRecord.size(); i++) {
|
||||||
|
csvPrinter.print(csvRecord.get(i));
|
||||||
|
}
|
||||||
|
assertEquals("a,b,c,d,,f,", stringBuilder.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// Regression, already passed before fix
|
||||||
|
|
||||||
|
public void testParseWithDoublePipeDelimiterDoubleCharValue() throws Exception {
|
||||||
|
final Reader in = new StringReader("a||bb||cc||dd||f");
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
|
||||||
|
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("||").build())) {
|
||||||
|
for (CSVRecord csvRecord : csvParser) {
|
||||||
|
for (int i = 0; i < csvRecord.size(); i++) {
|
||||||
|
csvPrinter.print(csvRecord.get(i));
|
||||||
|
}
|
||||||
|
assertEquals("a,bb,cc,dd,f", stringBuilder.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// Regression, already passed before fix
|
||||||
|
public void testParseWithTwoCharDelimiter1() throws Exception {
|
||||||
|
final Reader in = new StringReader("a~|b~|c~|d~|~|f");
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
|
||||||
|
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
|
||||||
|
for (CSVRecord csvRecord : csvParser) {
|
||||||
|
for (int i = 0; i < csvRecord.size(); i++) {
|
||||||
|
csvPrinter.print(csvRecord.get(i));
|
||||||
|
}
|
||||||
|
assertEquals("a,b,c,d,,f", stringBuilder.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// Regression, already passed before fix
|
||||||
|
public void testParseWithTwoCharDelimiter2() throws Exception {
|
||||||
|
final Reader in = new StringReader("a~|b~|c~|d~|~|f~");
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
|
||||||
|
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
|
||||||
|
for (CSVRecord csvRecord : csvParser) {
|
||||||
|
for (int i = 0; i < csvRecord.size(); i++) {
|
||||||
|
csvPrinter.print(csvRecord.get(i));
|
||||||
|
}
|
||||||
|
assertEquals("a,b,c,d,,f~", stringBuilder.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// Regression, already passed before fix
|
||||||
|
public void testParseWithTwoCharDelimiter3() throws Exception {
|
||||||
|
final Reader in = new StringReader("a~|b~|c~|d~|~|f|");
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
|
||||||
|
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
|
||||||
|
for (CSVRecord csvRecord : csvParser) {
|
||||||
|
for (int i = 0; i < csvRecord.size(); i++) {
|
||||||
|
csvPrinter.print(csvRecord.get(i));
|
||||||
|
}
|
||||||
|
assertEquals("a,b,c,d,,f|", stringBuilder.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// Regression, already passed before fix
|
||||||
|
public void testParseWithTwoCharDelimiter4() throws Exception {
|
||||||
|
final Reader in = new StringReader("a~|b~|c~|d~|~|f~~||g");
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
|
||||||
|
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("~|").build())) {
|
||||||
|
for (CSVRecord csvRecord : csvParser) {
|
||||||
|
for (int i = 0; i < csvRecord.size(); i++) {
|
||||||
|
csvPrinter.print(csvRecord.get(i));
|
||||||
|
}
|
||||||
|
assertEquals("a,b,c,d,,f~,|g", stringBuilder.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
// Regression, already passed before fix
|
||||||
|
public void testParseWithSinglePipeDelimiterEndsWithDelimiter() throws Exception {
|
||||||
|
final Reader in = new StringReader("a|b|c|d||f|");
|
||||||
|
StringBuilder stringBuilder = new StringBuilder();
|
||||||
|
try (CSVPrinter csvPrinter = new CSVPrinter(stringBuilder, CSVFormat.EXCEL);
|
||||||
|
CSVParser csvParser = CSVParser.parse(in, CSVFormat.Builder.create().setDelimiter("|").build())) {
|
||||||
|
for (CSVRecord csvRecord : csvParser) {
|
||||||
|
for (int i = 0; i < csvRecord.size(); i++) {
|
||||||
|
csvPrinter.print(csvRecord.get(i));
|
||||||
|
}
|
||||||
|
assertEquals("a,b,c,d,,f,", stringBuilder.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue