mirror of
https://github.com/apache/commons-csv.git
synced 2025-02-17 07:26:32 +00:00
CSV-98 Line number counting is confusing
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1479936 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5b2e5f809b
commit
71c69df6dd
@ -202,7 +202,7 @@ final class CSVLexer extends Lexer {
|
||||
*/
|
||||
private Token parseEncapsulatedToken(final Token tkn) throws IOException {
|
||||
// save current line number in case needed for IOE
|
||||
final long startLineNumber = getLineNumber();
|
||||
final long startLineNumber = getCurrentLineNumber();
|
||||
int c;
|
||||
while (true) {
|
||||
c = in.read();
|
||||
@ -235,7 +235,7 @@ final class CSVLexer extends Lexer {
|
||||
return tkn;
|
||||
} else if (!isWhitespace(c)) {
|
||||
// error invalid char between token and next delimiter
|
||||
throw new IOException("(line " + getLineNumber() +
|
||||
throw new IOException("(line " + getCurrentLineNumber() +
|
||||
") invalid char between encapsulated token and delimiter");
|
||||
}
|
||||
}
|
||||
|
@ -158,8 +158,8 @@ public class CSVParser implements Iterable<CSVRecord> {
|
||||
*
|
||||
* @return current line number
|
||||
*/
|
||||
public long getLineNumber() {
|
||||
return lexer.getLineNumber();
|
||||
public long getCurrentLineNumber() {
|
||||
return lexer.getCurrentLineNumber();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -200,7 +200,7 @@ public class CSVParser implements Iterable<CSVRecord> {
|
||||
}
|
||||
break;
|
||||
case INVALID:
|
||||
throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");
|
||||
throw new IOException("(line " + getCurrentLineNumber() + ") invalid parse sequence");
|
||||
case COMMENT: // Ignored currently
|
||||
if (sb == null) { // first comment for this record
|
||||
sb = new StringBuilder();
|
||||
|
@ -39,8 +39,8 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||
/** The last char returned */
|
||||
private int lastChar = UNDEFINED;
|
||||
|
||||
/** The line counter */
|
||||
private long lineCounter;
|
||||
/** The count of EOLs (CR/LF/CRLF) seen so far */
|
||||
private long eolCounter = 0;
|
||||
|
||||
/**
|
||||
* Created extended buffered reader using default buffer-size
|
||||
@ -53,7 +53,7 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||
public int read() throws IOException {
|
||||
final int current = super.read();
|
||||
if (current == CR || (current == LF && lastChar != CR)) {
|
||||
lineCounter++;
|
||||
eolCounter++;
|
||||
}
|
||||
lastChar = current;
|
||||
return lastChar;
|
||||
@ -85,10 +85,10 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||
final char ch = buf[i];
|
||||
if (ch == LF) {
|
||||
if (CR != (i > 0 ? buf[i - 1] : lastChar)) {
|
||||
lineCounter++;
|
||||
eolCounter++;
|
||||
}
|
||||
} else if (ch == CR) {
|
||||
lineCounter++;
|
||||
eolCounter++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -105,7 +105,7 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||
* Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
|
||||
* when processing a comment, otherwise information can be lost.
|
||||
* <p>
|
||||
* Increments {@link #lineCounter}
|
||||
* Increments {@link #eolCounter}
|
||||
* <p>
|
||||
* Sets {@link #lastChar} to {@link #END_OF_STREAM} at EOF, otherwise to LF
|
||||
*
|
||||
@ -117,7 +117,7 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||
|
||||
if (line != null) {
|
||||
lastChar = LF; // needed for detecting start of line
|
||||
lineCounter++;
|
||||
eolCounter++;
|
||||
} else {
|
||||
lastChar = END_OF_STREAM;
|
||||
}
|
||||
@ -127,7 +127,7 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||
|
||||
/**
|
||||
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
|
||||
* still return this value.
|
||||
* still return this value. Does not affect line number or last character.
|
||||
*
|
||||
* @return the next character
|
||||
*
|
||||
@ -143,11 +143,15 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of lines read
|
||||
* Returns the current line number
|
||||
*
|
||||
* @return the number of EOLs seen so far
|
||||
* @return the current line number
|
||||
*/
|
||||
long getLineNumber() {
|
||||
return lineCounter;
|
||||
long getCurrentLineNumber() {
|
||||
// Check if we are at EOL or EOF or just starting
|
||||
if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
|
||||
return eolCounter; // counter is accurate
|
||||
}
|
||||
return eolCounter + 1; // Allow for counter being incremented only at EOL
|
||||
}
|
||||
}
|
||||
|
@ -70,12 +70,12 @@ abstract class Lexer {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of lines read
|
||||
* Returns the current line number
|
||||
*
|
||||
* @return the number of EOLs seen so far
|
||||
* @return the current line number
|
||||
*/
|
||||
long getLineNumber() {
|
||||
return in.getLineNumber();
|
||||
long getCurrentLineNumber() {
|
||||
return in.getCurrentLineNumber();
|
||||
}
|
||||
|
||||
// TODO escape handling needs more work
|
||||
|
@ -198,7 +198,7 @@ class CSVLexer1 extends Lexer {
|
||||
*/
|
||||
private Token encapsulatedTokenLexer(final Token tkn, int c) throws IOException {
|
||||
// save current line
|
||||
final long startLineNumber = getLineNumber();
|
||||
final long startLineNumber = getCurrentLineNumber();
|
||||
// ignore the given delimiter
|
||||
// assert c == delimiter;
|
||||
while (true) {
|
||||
@ -230,7 +230,7 @@ class CSVLexer1 extends Lexer {
|
||||
return tkn;
|
||||
} else if (!isWhitespace(c)) {
|
||||
// error invalid char between token and next delimiter
|
||||
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
|
||||
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -187,7 +187,7 @@ class CSVLexer1306663 extends Lexer {
|
||||
*/
|
||||
private Token encapsulatedTokenLexer(final Token tkn) throws IOException {
|
||||
// save current line
|
||||
final long startLineNumber = getLineNumber();
|
||||
final long startLineNumber = getCurrentLineNumber();
|
||||
// ignore the given delimiter
|
||||
// assert c == delimiter;
|
||||
int c;
|
||||
@ -218,7 +218,7 @@ class CSVLexer1306663 extends Lexer {
|
||||
return tkn;
|
||||
} else if (!isWhitespace(c)) {
|
||||
// error invalid char between token and next delimiter
|
||||
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
|
||||
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -187,7 +187,7 @@ class CSVLexer1306667 extends Lexer {
|
||||
*/
|
||||
private Token encapsulatedTokenLexer(final Token tkn) throws IOException {
|
||||
// save current line
|
||||
final long startLineNumber = getLineNumber();
|
||||
final long startLineNumber = getCurrentLineNumber();
|
||||
// ignore the given delimiter
|
||||
// assert c == delimiter;
|
||||
int c;
|
||||
@ -218,7 +218,7 @@ class CSVLexer1306667 extends Lexer {
|
||||
return tkn;
|
||||
} else if (!isWhitespace(c)) {
|
||||
// error invalid char between token and next delimiter
|
||||
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
|
||||
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -170,7 +170,7 @@ class CSVLexer3 extends Lexer {
|
||||
state = State.ESCAPE_QUOTE;
|
||||
break;
|
||||
case EOFCHAR:
|
||||
throw new IOException("(line " + getLineNumber() + ") unexpected EOF in quoted string");
|
||||
throw new IOException("(line " + getCurrentLineNumber() + ") unexpected EOF in quoted string");
|
||||
default:
|
||||
tkn.content.append((char) intch);
|
||||
break;
|
||||
@ -194,7 +194,7 @@ class CSVLexer3 extends Lexer {
|
||||
case WHITESPACE: // trailing whitespace may be allowed
|
||||
if (!ignoreSurroundingSpaces) {
|
||||
// error invalid char between token and next delimiter
|
||||
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
|
||||
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
|
||||
}
|
||||
break;
|
||||
// Everything else is invalid
|
||||
@ -202,7 +202,7 @@ class CSVLexer3 extends Lexer {
|
||||
case OTHER:
|
||||
case COMMENT_START:
|
||||
// error invalid char between token and next delimiter
|
||||
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
|
||||
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
|
||||
}
|
||||
break;
|
||||
case ESCAPE_PLAIN:
|
||||
@ -221,7 +221,7 @@ class CSVLexer3 extends Lexer {
|
||||
tkn.content.append((char) intch);
|
||||
break;
|
||||
case EOFCHAR:
|
||||
throw new IOException("(line " + getLineNumber() + ") unexpected EOF in escape sequence");
|
||||
throw new IOException("(line " + getCurrentLineNumber() + ") unexpected EOF in escape sequence");
|
||||
}
|
||||
break;
|
||||
case ESCAPE_QUOTE:
|
||||
@ -239,7 +239,7 @@ class CSVLexer3 extends Lexer {
|
||||
tkn.content.append((char) intch);
|
||||
break;
|
||||
case EOFCHAR:
|
||||
throw new IOException("(line " + getLineNumber() + ") unexpected EOF in escape sequence");
|
||||
throw new IOException("(line " + getCurrentLineNumber() + ") unexpected EOF in escape sequence");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -629,21 +629,21 @@ public class CSVParserTest {
|
||||
CSVFormat.newBuilder().withRecordSeparator(CRLF).build());
|
||||
CSVRecord record;
|
||||
assertEquals(0, parser.getRecordNumber());
|
||||
assertEquals(0, parser.getLineNumber());
|
||||
assertEquals(0, parser.getCurrentLineNumber());
|
||||
assertNotNull(record = parser.nextRecord());
|
||||
assertEquals(3, parser.getLineNumber());
|
||||
assertEquals(3, parser.getCurrentLineNumber());
|
||||
assertEquals(1, record.getRecordNumber());
|
||||
assertEquals(1, parser.getRecordNumber());
|
||||
assertNotNull(record = parser.nextRecord());
|
||||
assertEquals(6, parser.getLineNumber());
|
||||
assertEquals(6, parser.getCurrentLineNumber());
|
||||
assertEquals(2, record.getRecordNumber());
|
||||
assertEquals(2, parser.getRecordNumber());
|
||||
assertNotNull(record = parser.nextRecord());
|
||||
assertEquals(8, parser.getLineNumber());
|
||||
assertEquals(8, parser.getCurrentLineNumber());
|
||||
assertEquals(3, record.getRecordNumber());
|
||||
assertEquals(3, parser.getRecordNumber());
|
||||
assertNull(record = parser.nextRecord());
|
||||
assertEquals(8, parser.getLineNumber());
|
||||
assertEquals(8, parser.getCurrentLineNumber());
|
||||
assertEquals(3, parser.getRecordNumber());
|
||||
}
|
||||
|
||||
@ -676,17 +676,17 @@ public class CSVParserTest {
|
||||
|
||||
private void validateLineNumbers(final String lineSeparator) throws IOException {
|
||||
final CSVParser parser = new CSVParser("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.newBuilder().withRecordSeparator(lineSeparator).build());
|
||||
assertEquals(0, parser.getLineNumber());
|
||||
assertEquals(0, parser.getCurrentLineNumber());
|
||||
assertNotNull(parser.nextRecord());
|
||||
assertEquals(1, parser.getLineNumber());
|
||||
assertEquals(1, parser.getCurrentLineNumber());
|
||||
assertNotNull(parser.nextRecord());
|
||||
assertEquals(2, parser.getLineNumber());
|
||||
assertEquals(2, parser.getCurrentLineNumber());
|
||||
assertNotNull(parser.nextRecord());
|
||||
// Still 2 because the last line is does not have EOL chars
|
||||
assertEquals(2, parser.getLineNumber());
|
||||
assertEquals(2, parser.getCurrentLineNumber());
|
||||
assertNull(parser.nextRecord());
|
||||
// Still 2 because the last line is does not have EOL chars
|
||||
assertEquals(2, parser.getLineNumber());
|
||||
assertEquals(2, parser.getCurrentLineNumber());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -47,44 +47,50 @@ public class ExtendedBufferedReaderTest {
|
||||
@Test
|
||||
public void testReadLookahead1() throws Exception {
|
||||
final ExtendedBufferedReader br = getBufferedReader("1\n2\r3\n");
|
||||
assertEquals(0, br.getCurrentLineNumber());
|
||||
assertEquals('1', br.lookAhead());
|
||||
assertEquals(UNDEFINED, br.getLastChar());
|
||||
assertEquals('1', br.read());
|
||||
assertEquals(0, br.getCurrentLineNumber());
|
||||
assertEquals('1', br.read()); // Start line 1
|
||||
assertEquals('1', br.getLastChar());
|
||||
|
||||
assertEquals(0, br.getLineNumber());
|
||||
assertEquals(1, br.getCurrentLineNumber());
|
||||
assertEquals('\n', br.lookAhead());
|
||||
assertEquals(0, br.getLineNumber());
|
||||
assertEquals(1, br.getCurrentLineNumber());
|
||||
assertEquals('1', br.getLastChar());
|
||||
assertEquals('\n', br.read());
|
||||
assertEquals(1, br.getLineNumber());
|
||||
assertEquals(1, br.getCurrentLineNumber());
|
||||
assertEquals('\n', br.getLastChar());
|
||||
assertEquals(1, br.getLineNumber());
|
||||
assertEquals(1, br.getCurrentLineNumber());
|
||||
|
||||
assertEquals('2', br.lookAhead());
|
||||
assertEquals(1, br.getLineNumber());
|
||||
assertEquals(1, br.getCurrentLineNumber());
|
||||
assertEquals('\n', br.getLastChar());
|
||||
assertEquals(1, br.getLineNumber());
|
||||
assertEquals('2', br.read());
|
||||
assertEquals(1, br.getCurrentLineNumber());
|
||||
assertEquals('2', br.read()); // Start line 2
|
||||
assertEquals(2, br.getCurrentLineNumber());
|
||||
assertEquals('2', br.getLastChar());
|
||||
|
||||
assertEquals('\r', br.lookAhead());
|
||||
assertEquals(2, br.getCurrentLineNumber());
|
||||
assertEquals('2', br.getLastChar());
|
||||
assertEquals('\r', br.read());
|
||||
assertEquals('\r', br.getLastChar());
|
||||
assertEquals(2, br.getCurrentLineNumber());
|
||||
|
||||
assertEquals('3', br.lookAhead());
|
||||
assertEquals('\r', br.getLastChar());
|
||||
assertEquals('3', br.read());
|
||||
assertEquals('3', br.read()); // Start line 3
|
||||
assertEquals('3', br.getLastChar());
|
||||
assertEquals(3, br.getCurrentLineNumber());
|
||||
|
||||
assertEquals('\n', br.lookAhead());
|
||||
assertEquals(2, br.getLineNumber());
|
||||
assertEquals(3, br.getCurrentLineNumber());
|
||||
assertEquals('3', br.getLastChar());
|
||||
assertEquals('\n', br.read());
|
||||
assertEquals(3, br.getLineNumber());
|
||||
assertEquals(3, br.getCurrentLineNumber());
|
||||
assertEquals('\n', br.getLastChar());
|
||||
assertEquals(3, br.getLineNumber());
|
||||
assertEquals(3, br.getCurrentLineNumber());
|
||||
|
||||
assertEquals(END_OF_STREAM, br.lookAhead());
|
||||
assertEquals('\n', br.getLastChar());
|
||||
@ -92,6 +98,7 @@ public class ExtendedBufferedReaderTest {
|
||||
assertEquals(END_OF_STREAM, br.getLastChar());
|
||||
assertEquals(END_OF_STREAM, br.read());
|
||||
assertEquals(END_OF_STREAM, br.lookAhead());
|
||||
assertEquals(3, br.getCurrentLineNumber());
|
||||
|
||||
}
|
||||
|
||||
@ -125,28 +132,28 @@ public class ExtendedBufferedReaderTest {
|
||||
assertNull(br.readLine());
|
||||
|
||||
br = getBufferedReader("foo\n\nhello");
|
||||
assertEquals(0, br.getLineNumber());
|
||||
assertEquals(0, br.getCurrentLineNumber());
|
||||
assertEquals("foo",br.readLine());
|
||||
assertEquals(1, br.getLineNumber());
|
||||
assertEquals(1, br.getCurrentLineNumber());
|
||||
assertEquals("",br.readLine());
|
||||
assertEquals(2, br.getLineNumber());
|
||||
assertEquals(2, br.getCurrentLineNumber());
|
||||
assertEquals("hello",br.readLine());
|
||||
assertEquals(3, br.getLineNumber());
|
||||
assertEquals(3, br.getCurrentLineNumber());
|
||||
assertNull(br.readLine());
|
||||
assertEquals(3, br.getLineNumber());
|
||||
assertEquals(3, br.getCurrentLineNumber());
|
||||
|
||||
br = getBufferedReader("foo\n\nhello");
|
||||
assertEquals('f', br.read());
|
||||
assertEquals('o', br.lookAhead());
|
||||
assertEquals("oo",br.readLine());
|
||||
assertEquals(1, br.getLineNumber());
|
||||
assertEquals(1, br.getCurrentLineNumber());
|
||||
assertEquals('\n', br.lookAhead());
|
||||
assertEquals("",br.readLine());
|
||||
assertEquals(2, br.getLineNumber());
|
||||
assertEquals(2, br.getCurrentLineNumber());
|
||||
assertEquals('h', br.lookAhead());
|
||||
assertEquals("hello",br.readLine());
|
||||
assertNull(br.readLine());
|
||||
assertEquals(3, br.getLineNumber());
|
||||
assertEquals(3, br.getCurrentLineNumber());
|
||||
|
||||
|
||||
br = getBufferedReader("foo\rbaar\r\nfoo");
|
||||
@ -171,20 +178,20 @@ public class ExtendedBufferedReaderTest {
|
||||
ExtendedBufferedReader br;
|
||||
|
||||
br = getBufferedReader(test);
|
||||
assertEquals(0, br.getLineNumber());
|
||||
assertEquals(0, br.getCurrentLineNumber());
|
||||
while(br.readLine()!=null) {}
|
||||
assertEquals(EOLeolct, br.getLineNumber());
|
||||
assertEquals(EOLeolct, br.getCurrentLineNumber());
|
||||
|
||||
br = getBufferedReader(test);
|
||||
assertEquals(0, br.getLineNumber());
|
||||
assertEquals(0, br.getCurrentLineNumber());
|
||||
while(br.read()!=-1) {}
|
||||
assertEquals(EOLeolct, br.getLineNumber());
|
||||
assertEquals(EOLeolct, br.getCurrentLineNumber());
|
||||
|
||||
br = getBufferedReader(test);
|
||||
assertEquals(0, br.getLineNumber());
|
||||
assertEquals(0, br.getCurrentLineNumber());
|
||||
final char[] buff = new char[10];
|
||||
while(br.read(buff ,0, 3)!=-1) {}
|
||||
assertEquals(EOLeolct, br.getLineNumber());
|
||||
assertEquals(EOLeolct, br.getCurrentLineNumber());
|
||||
}
|
||||
|
||||
private ExtendedBufferedReader getBufferedReader(final String s) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user