[CSV-265] Update buffer position when reading line comment (#120)

* [CSV-265] Add JiraCsv265Test

* [CSV-265] Update buffer position when reading line comment

* Update JiraCsv265Test.java

File should end in a new line.

Co-authored-by: Tyler King <tylerking001@hotmail.com>
Co-authored-by: Gary Gregory <garydgregory@users.noreply.github.com>
This commit is contained in:
king-tyler 2021-07-21 09:37:53 -05:00 committed by GitHub
parent 399204ce58
commit a4c6037d0c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 109 additions and 13 deletions

View File

@ -198,29 +198,37 @@ final class ExtendedBufferedReader extends BufferedReader {
}
/**
* Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
* when processing a comment, otherwise information can be lost.
* Gets the next line, dropping the line terminator(s). This method should only be called when processing a
* comment, otherwise information can be lost.
* <p>
* Increments {@link #eolCounter}.
* Increments {@link #eolCounter} and updates {@link #position}.
* </p>
* <p>
* Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise to LF.
* Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise the last EOL character.
* </p>
*
* @return the line that was read, or null if reached EOF.
*/
@Override
public String readLine() throws IOException {
final String line = super.readLine();
if (line != null) {
lastChar = LF; // needed for detecting start of line
eolCounter++;
} else {
lastChar = END_OF_STREAM;
if (lookAhead() == END_OF_STREAM) {
return null;
}
return line;
final StringBuilder buffer = new StringBuilder();
while (true) {
final int current = read();
if (current == CR) {
final int next = lookAhead();
if (next == LF) {
read();
}
}
if (current == END_OF_STREAM || current == LF || current == CR) {
break;
}
buffer.append((char) current);
}
return buffer.toString();
}
}

View File

@ -0,0 +1,88 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv.issues;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.junit.jupiter.api.Test;
/**
* Tests [CSV-265] {@link CSVRecord#getCharacterPosition()} returns the correct position after encountering a comment.
*/
public class JiraCsv265Test {
@Test
public void testCharacterPositionWithComments() throws IOException {
// @formatter:off
final String csv = "# Comment1\n"
+ "Header1,Header2\n"
+ "# Comment2\n"
+ "Value1,Value2\n"
+ "# Comment3\n"
+ "Value3,Value4\n"
+ "# Comment4\n";
final CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
.setCommentMarker('#')
.setHeader()
.setSkipHeaderRecord(true)
.build();
// @formatter:on
try (final CSVParser parser = csvFormat.parse(new StringReader(csv))) {
final Iterator<CSVRecord> itr = parser.iterator();
final CSVRecord record1 = itr.next();
assertEquals(csv.indexOf("# Comment2"), record1.getCharacterPosition());
final CSVRecord record2 = itr.next();
assertEquals(csv.indexOf("# Comment3"), record2.getCharacterPosition());
}
}
@Test
public void testCharacterPositionWithCommentsSpanningMultipleLines() throws IOException {
// @formatter:off
final String csv = "# Comment1\n"
+ "# Comment2\n"
+ "Header1,Header2\n"
+ "# Comment3\n"
+ "# Comment4\n"
+ "Value1,Value2\n"
+ "# Comment5\n"
+ "# Comment6\n"
+ "Value3,Value4";
final CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
.setCommentMarker('#')
.setHeader()
.setSkipHeaderRecord(true)
.build();
// @formatter:on
try (final CSVParser parser = csvFormat.parse(new StringReader(csv))) {
final Iterator<CSVRecord> itr = parser.iterator();
final CSVRecord record1 = itr.next();
assertEquals(csv.indexOf("# Comment3"), record1.getCharacterPosition());
final CSVRecord record2 = itr.next();
assertEquals(csv.indexOf("# Comment5"), record2.getCharacterPosition());
}
}
}