This commit is contained in:
Gary Gregory 2024-09-19 15:03:44 -04:00
commit decbfb3ea2
7 changed files with 91 additions and 85 deletions

View File

@ -27,26 +27,26 @@ jobs:
continue-on-error: ${{ matrix.experimental }}
strategy:
matrix:
java: [ 8, 11, 17, 21, 22 ]
java: [ 8, 11, 17, 21 ]
experimental: [false]
include:
- java: 23-ea
- java: 23
experimental: true
- java: 24-ea
experimental: true
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # 4.1.7
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # 4.1.7
with:
persist-credentials: false
- uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
- uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@2dfa2011c5b2a0f1489bf9e433881c92c1631f88 # v4.3.0
uses: actions/setup-java@2dfa2011c5b2a0f1489bf9e433881c92c1631f88 # v4.3.0
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}

View File

@ -20,7 +20,7 @@
<parent>
<groupId>org.apache.commons</groupId>
<artifactId>commons-parent</artifactId>
<version>74</version>
<version>75</version>
</parent>
<artifactId>commons-csv</artifactId>
<version>1.12.0-SNAPSHOT</version>
@ -50,7 +50,7 @@
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.16.1</version>
<version>2.17.0</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
@ -173,7 +173,6 @@
<commons.jira.pid>12313222</commons.jira.pid>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<commons.javadoc.java.link>http://docs.oracle.com/javase/8/docs/api/</commons.javadoc.java.link>
<!-- Ensure copies work OK (can be removed later when this is in parent POM) -->
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>

View File

@ -51,12 +51,15 @@
<action type="fix" dev="ggregory" due-to="Dávid Szigecsán">Fix documentation for CSVFormat private constructor #466.</action>
<action type="fix" issue="CSV-294" dev="ggregory" due-to="Joern Huxhorn, Gary Gregory">CSVFormat does not support explicit " as escape char.</action>
<action type="fix" issue="CSV-150" dev="ggregory" due-to="dota17, Gary Gregory, Jörn Huxhorn">Escaping is not disableable.</action>
<action type="fix" dev="ggregory" due-to="Gary Gregory">Fix Javadoc warnings on Java 23.</action>
<action type="fix" dev="ggregory" due-to="Gary Gregory">Improve parser performance by up to 20%, YMMV.</action>
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Dependabot">Bump commons-codec:commons-codec from 1.16.1 to 1.17.1 #422, #449.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump org.apache.commons:commons-parent from 69 to 74 #435, #452, #465, #468.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump org.apache.commons:commons-parent from 69 to 75 #435, #452, #465, #468, #475.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump org.codehaus.mojo:taglist-maven-plugin from 3.0.0 to 3.1.0 #441.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump org.apache.commons:commons-lang3 from 3.14.0 to 3.17.0 #450, #459, #470.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump org.hamcrest:hamcrest from 2.2 to 3.0 #455.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory, Dependabot">Bump commons-io:commons-io from 2.16.1 to 2.17.0 #476.</action>
</release>
<release version="1.11.0" date="2024-04-28" description="Feature and bug fix release (Java 8 or above)">
<!-- ADD -->

View File

@ -835,66 +835,90 @@ public final class CSVFormat implements Serializable {
public enum Predefined {
/**
* The DEFAULT predefined format.
*
* @see CSVFormat#DEFAULT
*/
Default(DEFAULT),
/**
* The EXCEL predefined format.
*
* @see CSVFormat#EXCEL
*/
Excel(EXCEL),
/**
* The INFORMIX_UNLOAD predefined format.
*
* @see CSVFormat#INFORMIX_UNLOAD
* @since 1.3
*/
InformixUnload(INFORMIX_UNLOAD),
/**
* The INFORMIX_UNLOAD_CSV predefined format.
*
* @see CSVFormat#INFORMIX_UNLOAD_CSV
* @since 1.3
*/
InformixUnloadCsv(INFORMIX_UNLOAD_CSV),
/**
* The MONGODB_CSV predefined format.
*
* @see CSVFormat#MONGODB_CSV
* @since 1.7
*/
MongoDBCsv(MONGODB_CSV),
/**
* The MONGODB_TSV predefined format.
*
* @see CSVFormat#MONGODB_TSV
* @since 1.7
*/
MongoDBTsv(MONGODB_TSV),
/**
* The MYSQL predefined format.
*
* @see CSVFormat#MYSQL
*/
MySQL(MYSQL),
/**
* The ORACLE predefined format.
*
* @see CSVFormat#ORACLE
*/
Oracle(ORACLE),
/**
* The POSTGRESQL_CSV predefined format.
*
* @see CSVFormat#POSTGRESQL_CSV
* @since 1.5
*/
PostgreSQLCsv(POSTGRESQL_CSV),
/**
* @see CSVFormat#POSTGRESQL_CSV
* The POSTGRESQL_TEXT predefined format.
*
* @see CSVFormat#POSTGRESQL_TEXT
*/
PostgreSQLText(POSTGRESQL_TEXT),
/**
* The RFC4180 predefined format.
*
* @see CSVFormat#RFC4180
*/
RFC4180(CSVFormat.RFC4180),
/**
* The TDF predefined format.
*
* @see CSVFormat#TDF
*/
TDF(CSVFormat.TDF);

View File

@ -22,11 +22,11 @@ import static org.apache.commons.csv.Constants.LF;
import static org.apache.commons.csv.Constants.UNDEFINED;
import static org.apache.commons.io.IOUtils.EOF;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.UnsynchronizedBufferedReader;
/**
* A special buffered reader which supports sophisticated read access.
@ -35,18 +35,19 @@ import org.apache.commons.io.IOUtils;
* {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}.
* </p>
*/
final class ExtendedBufferedReader extends BufferedReader {
final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
/** The last char returned */
private int lastChar = UNDEFINED;
private int lastCharMark = UNDEFINED;
/** The count of EOLs (CR/LF/CRLF) seen so far */
private long lineNumber;
private long lineNumberMark;
/** The position, which is the number of characters read so far */
private long position;
private boolean closed;
private long positionMark;
/**
* Constructs a new instance using the default buffer size.
@ -55,6 +56,22 @@ final class ExtendedBufferedReader extends BufferedReader {
super(reader);
}
@Override
public void mark(final int readAheadLimit) throws IOException {
lineNumberMark = lineNumber;
lastCharMark = lastChar;
positionMark = position;
super.mark(readAheadLimit);
}
@Override
public void reset() throws IOException {
lineNumber = lineNumberMark;
lastChar = lastCharMark;
position = positionMark;
super.reset();
}
/**
* Closes the stream.
*
@ -64,7 +81,6 @@ final class ExtendedBufferedReader extends BufferedReader {
@Override
public void close() throws IOException {
// Set ivars before calling super close() in case close() throws an IOException.
closed = true;
lastChar = EOF;
super.close();
}
@ -74,7 +90,7 @@ final class ExtendedBufferedReader extends BufferedReader {
*
* @return the current line number
*/
long getCurrentLineNumber() {
long getLineNumber() {
// Check if we are at EOL or EOF or just starting
if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == EOF) {
return lineNumber; // counter is accurate
@ -103,42 +119,6 @@ final class ExtendedBufferedReader extends BufferedReader {
return this.position;
}
public boolean isClosed() {
return closed;
}
/**
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
* still return this value. Does not affect the line number or the last character.
*
* @return the next character
*
* @throws IOException
* If an I/O error occurs
*/
int peek() throws IOException {
super.mark(1);
final int c = super.read();
super.reset();
return c;
}
/**
* Populates the buffer with the next {@code buf.length} characters in the current reader without consuming them. The next call to {@link #read()} will
* still return the next value. This doesn't affect the line number or the last character.
*
* @param buf the buffer to fill for the look ahead.
* @return The number of characters peeked, or -1 if the end of the stream has been reached.
* @throws IOException If an I/O error occurs
*/
int peek(final char[] buf) throws IOException {
final int n = buf.length;
super.mark(n);
final int c = super.read(buf, 0, n);
super.reset();
return c;
}
@Override
public int read() throws IOException {
final int current = super.read();

View File

@ -89,7 +89,7 @@ final class Lexer implements Closeable {
* @return the current line number
*/
long getCurrentLineNumber() {
return reader.getCurrentLineNumber();
return reader.getLineNumber();
}
String getFirstEol() {

View File

@ -61,32 +61,32 @@ public class ExtendedBufferedReaderTest {
final int EOLeolct = 9;
try (final ExtendedBufferedReader br = createBufferedReader(test)) {
assertEquals(0, br.getCurrentLineNumber());
assertEquals(0, br.getLineNumber());
int lineCount = 0;
while (br.readLine() != null) {
// consume all
lineCount++;
}
assertEquals(EOLeolct, br.getCurrentLineNumber());
assertEquals(lineCount, br.getCurrentLineNumber());
assertEquals(EOLeolct, br.getLineNumber());
assertEquals(lineCount, br.getLineNumber());
}
try (final ExtendedBufferedReader br = createBufferedReader(test)) {
assertEquals(0, br.getCurrentLineNumber());
assertEquals(0, br.getLineNumber());
int readCount = 0;
while (br.read() != EOF) {
// consume all
readCount++;
}
assertEquals(EOLeolct, br.getCurrentLineNumber());
assertEquals(EOLeolct, br.getLineNumber());
assertEquals(readCount, test.length());
}
try (final ExtendedBufferedReader br = createBufferedReader(test)) {
assertEquals(0, br.getCurrentLineNumber());
assertEquals(0, br.getLineNumber());
final char[] buff = new char[10];
while (br.read(buff, 0, 3) != EOF) {
// consume all
}
assertEquals(EOLeolct, br.getCurrentLineNumber());
assertEquals(EOLeolct, br.getLineNumber());
}
}
@ -96,7 +96,7 @@ public class ExtendedBufferedReaderTest {
try (ExtendedBufferedReader reader = createBufferedReader("1\r\n2\r\n")) {
reader.read(tmp1, 0, 2);
reader.read(tmp2, 2, 2);
assertEquals(2, reader.getCurrentLineNumber());
assertEquals(2, reader.getLineNumber());
}
}
@ -110,28 +110,28 @@ public class ExtendedBufferedReaderTest {
assertNull(br.readLine());
}
try (final ExtendedBufferedReader br = createBufferedReader("foo\n\nhello")) {
assertEquals(0, br.getCurrentLineNumber());
assertEquals(0, br.getLineNumber());
assertEquals("foo", br.readLine());
assertEquals(1, br.getCurrentLineNumber());
assertEquals(1, br.getLineNumber());
assertEquals("", br.readLine());
assertEquals(2, br.getCurrentLineNumber());
assertEquals(2, br.getLineNumber());
assertEquals("hello", br.readLine());
assertEquals(3, br.getCurrentLineNumber());
assertEquals(3, br.getLineNumber());
assertNull(br.readLine());
assertEquals(3, br.getCurrentLineNumber());
assertEquals(3, br.getLineNumber());
}
try (final ExtendedBufferedReader br = createBufferedReader("foo\n\nhello")) {
assertEquals('f', br.read());
assertEquals('o', br.peek());
assertEquals("oo", br.readLine());
assertEquals(1, br.getCurrentLineNumber());
assertEquals(1, br.getLineNumber());
assertEquals('\n', br.peek());
assertEquals("", br.readLine());
assertEquals(2, br.getCurrentLineNumber());
assertEquals(2, br.getLineNumber());
assertEquals('h', br.peek());
assertEquals("hello", br.readLine());
assertNull(br.readLine());
assertEquals(3, br.getCurrentLineNumber());
assertEquals(3, br.getLineNumber());
}
try (final ExtendedBufferedReader br = createBufferedReader("foo\rbaar\r\nfoo")) {
assertEquals("foo", br.readLine());
@ -146,50 +146,50 @@ public class ExtendedBufferedReaderTest {
@Test
public void testReadLookahead1() throws Exception {
try (final ExtendedBufferedReader br = createBufferedReader("1\n2\r3\n")) {
assertEquals(0, br.getCurrentLineNumber());
assertEquals(0, br.getLineNumber());
assertEquals('1', br.peek());
assertEquals(UNDEFINED, br.getLastChar());
assertEquals(0, br.getCurrentLineNumber());
assertEquals(0, br.getLineNumber());
assertEquals('1', br.read()); // Start line 1
assertEquals('1', br.getLastChar());
assertEquals(1, br.getCurrentLineNumber());
assertEquals(1, br.getLineNumber());
assertEquals('\n', br.peek());
assertEquals(1, br.getCurrentLineNumber());
assertEquals(1, br.getLineNumber());
assertEquals('1', br.getLastChar());
assertEquals('\n', br.read());
assertEquals(1, br.getCurrentLineNumber());
assertEquals(1, br.getLineNumber());
assertEquals('\n', br.getLastChar());
assertEquals(1, br.getCurrentLineNumber());
assertEquals(1, br.getLineNumber());
assertEquals('2', br.peek());
assertEquals(1, br.getCurrentLineNumber());
assertEquals(1, br.getLineNumber());
assertEquals('\n', br.getLastChar());
assertEquals(1, br.getCurrentLineNumber());
assertEquals(1, br.getLineNumber());
assertEquals('2', br.read()); // Start line 2
assertEquals(2, br.getCurrentLineNumber());
assertEquals(2, br.getLineNumber());
assertEquals('2', br.getLastChar());
assertEquals('\r', br.peek());
assertEquals(2, br.getCurrentLineNumber());
assertEquals(2, br.getLineNumber());
assertEquals('2', br.getLastChar());
assertEquals('\r', br.read());
assertEquals('\r', br.getLastChar());
assertEquals(2, br.getCurrentLineNumber());
assertEquals(2, br.getLineNumber());
assertEquals('3', br.peek());
assertEquals('\r', br.getLastChar());
assertEquals('3', br.read()); // Start line 3
assertEquals('3', br.getLastChar());
assertEquals(3, br.getCurrentLineNumber());
assertEquals(3, br.getLineNumber());
assertEquals('\n', br.peek());
assertEquals(3, br.getCurrentLineNumber());
assertEquals(3, br.getLineNumber());
assertEquals('3', br.getLastChar());
assertEquals('\n', br.read());
assertEquals(3, br.getCurrentLineNumber());
assertEquals(3, br.getLineNumber());
assertEquals('\n', br.getLastChar());
assertEquals(3, br.getCurrentLineNumber());
assertEquals(3, br.getLineNumber());
assertEquals(EOF, br.peek());
assertEquals('\n', br.getLastChar());
@ -197,7 +197,7 @@ public class ExtendedBufferedReaderTest {
assertEquals(EOF, br.getLastChar());
assertEquals(EOF, br.read());
assertEquals(EOF, br.peek());
assertEquals(3, br.getCurrentLineNumber());
assertEquals(3, br.getLineNumber());
}
}