From a4c6037d0ccfc5b7176d6c522fcc0ab38b9bdd28 Mon Sep 17 00:00:00 2001 From: king-tyler <8304181+king-tyler@users.noreply.github.com> Date: Wed, 21 Jul 2021 09:37:53 -0500 Subject: [PATCH] [CSV-265] Update buffer position when reading line comment (#120) * [CSV-265] Add JiraCsv265Test * [CSV-265] Update buffer position when reading line comment * Update JiraCsv265Test.java File should end in a new line. Co-authored-by: Tyler King Co-authored-by: Gary Gregory --- .../commons/csv/ExtendedBufferedReader.java | 34 ++++--- .../commons/csv/issues/JiraCsv265Test.java | 88 +++++++++++++++++++ 2 files changed, 109 insertions(+), 13 deletions(-) create mode 100644 src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java index 4b3fb483..1067dd26 100644 --- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java +++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java @@ -198,29 +198,37 @@ final class ExtendedBufferedReader extends BufferedReader { } /** - * Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called - * when processing a comment, otherwise information can be lost. + * Gets the next line, dropping the line terminator(s). This method should only be called when processing a + * comment, otherwise information can be lost. *

- * Increments {@link #eolCounter}. + * Increments {@link #eolCounter} and updates {@link #position}. *

*

- * Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise to LF. + * Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise the last EOL character. *

* * @return the line that was read, or null if reached EOF. */ @Override public String readLine() throws IOException { - final String line = super.readLine(); - - if (line != null) { - lastChar = LF; // needed for detecting start of line - eolCounter++; - } else { - lastChar = END_OF_STREAM; + if (lookAhead() == END_OF_STREAM) { + return null; } - - return line; + final StringBuilder buffer = new StringBuilder(); + while (true) { + final int current = read(); + if (current == CR) { + final int next = lookAhead(); + if (next == LF) { + read(); + } + } + if (current == END_OF_STREAM || current == LF || current == CR) { + break; + } + buffer.append((char) current); + } + return buffer.toString(); } } diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java new file mode 100644 index 00000000..f62b8665 --- /dev/null +++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv265Test.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv.issues; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.io.StringReader; +import java.util.Iterator; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; +import org.junit.jupiter.api.Test; + +/** + * Tests [CSV-265] {@link CSVRecord#getCharacterPosition()} returns the correct position after encountering a comment. + */ +public class JiraCsv265Test { + + @Test + public void testCharacterPositionWithComments() throws IOException { + // @formatter:off + final String csv = "# Comment1\n" + + "Header1,Header2\n" + + "# Comment2\n" + + "Value1,Value2\n" + + "# Comment3\n" + + "Value3,Value4\n" + + "# Comment4\n"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.builder() + .setCommentMarker('#') + .setHeader() + .setSkipHeaderRecord(true) + .build(); + // @formatter:on + try (final CSVParser parser = csvFormat.parse(new StringReader(csv))) { + final Iterator itr = parser.iterator(); + final CSVRecord record1 = itr.next(); + assertEquals(csv.indexOf("# Comment2"), record1.getCharacterPosition()); + final CSVRecord record2 = itr.next(); + assertEquals(csv.indexOf("# Comment3"), record2.getCharacterPosition()); + } + } + + @Test + public void testCharacterPositionWithCommentsSpanningMultipleLines() throws IOException { + // @formatter:off + final String csv = "# Comment1\n" + + "# Comment2\n" + + "Header1,Header2\n" + + "# Comment3\n" + + "# Comment4\n" + + "Value1,Value2\n" + + "# Comment5\n" + + "# Comment6\n" + + "Value3,Value4"; + final CSVFormat csvFormat = CSVFormat.DEFAULT.builder() + .setCommentMarker('#') + .setHeader() + .setSkipHeaderRecord(true) + .build(); + // @formatter:on + try (final CSVParser parser = csvFormat.parse(new StringReader(csv))) { + final Iterator itr = parser.iterator(); + final CSVRecord record1 = itr.next(); + assertEquals(csv.indexOf("# Comment3"), record1.getCharacterPosition()); + final CSVRecord record2 = itr.next(); + assertEquals(csv.indexOf("# Comment5"), record2.getCharacterPosition()); + } + } + +}