CSV-98 Line number counting is confusing

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1479936 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sebastian Bazley 2013-05-07 15:12:48 +00:00
parent 5b2e5f809b
commit 71c69df6dd
10 changed files with 79 additions and 68 deletions

View File

@ -202,7 +202,7 @@ final class CSVLexer extends Lexer {
*/
private Token parseEncapsulatedToken(final Token tkn) throws IOException {
// save current line number in case needed for IOE
final long startLineNumber = getLineNumber();
final long startLineNumber = getCurrentLineNumber();
int c;
while (true) {
c = in.read();
@ -235,7 +235,7 @@ final class CSVLexer extends Lexer {
return tkn;
} else if (!isWhitespace(c)) {
// error invalid char between token and next delimiter
throw new IOException("(line " + getLineNumber() +
throw new IOException("(line " + getCurrentLineNumber() +
") invalid char between encapsulated token and delimiter");
}
}

View File

@ -158,8 +158,8 @@ public class CSVParser implements Iterable<CSVRecord> {
*
* @return current line number
*/
public long getLineNumber() {
return lexer.getLineNumber();
public long getCurrentLineNumber() {
return lexer.getCurrentLineNumber();
}
/**
@ -200,7 +200,7 @@ public class CSVParser implements Iterable<CSVRecord> {
}
break;
case INVALID:
throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");
throw new IOException("(line " + getCurrentLineNumber() + ") invalid parse sequence");
case COMMENT: // Ignored currently
if (sb == null) { // first comment for this record
sb = new StringBuilder();

View File

@ -39,8 +39,8 @@ final class ExtendedBufferedReader extends BufferedReader {
/** The last char returned */
private int lastChar = UNDEFINED;
/** The line counter */
private long lineCounter;
/** The count of EOLs (CR/LF/CRLF) seen so far */
private long eolCounter = 0;
/**
* Created extended buffered reader using default buffer-size
@ -53,7 +53,7 @@ final class ExtendedBufferedReader extends BufferedReader {
public int read() throws IOException {
final int current = super.read();
if (current == CR || (current == LF && lastChar != CR)) {
lineCounter++;
eolCounter++;
}
lastChar = current;
return lastChar;
@ -85,10 +85,10 @@ final class ExtendedBufferedReader extends BufferedReader {
final char ch = buf[i];
if (ch == LF) {
if (CR != (i > 0 ? buf[i - 1] : lastChar)) {
lineCounter++;
eolCounter++;
}
} else if (ch == CR) {
lineCounter++;
eolCounter++;
}
}
@ -105,7 +105,7 @@ final class ExtendedBufferedReader extends BufferedReader {
* Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
* when processing a comment, otherwise information can be lost.
* <p>
* Increments {@link #lineCounter}
* Increments {@link #eolCounter}
* <p>
* Sets {@link #lastChar} to {@link #END_OF_STREAM} at EOF, otherwise to LF
*
@ -117,7 +117,7 @@ final class ExtendedBufferedReader extends BufferedReader {
if (line != null) {
lastChar = LF; // needed for detecting start of line
lineCounter++;
eolCounter++;
} else {
lastChar = END_OF_STREAM;
}
@ -127,7 +127,7 @@ final class ExtendedBufferedReader extends BufferedReader {
/**
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
* still return this value.
* still return this value. Does not affect line number or last character.
*
* @return the next character
*
@ -143,11 +143,15 @@ final class ExtendedBufferedReader extends BufferedReader {
}
/**
* Returns the number of lines read
* Returns the current line number
*
* @return the number of EOLs seen so far
* @return the current line number
*/
long getLineNumber() {
return lineCounter;
long getCurrentLineNumber() {
// Check if we are at EOL or EOF or just starting
if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
return eolCounter; // counter is accurate
}
return eolCounter + 1; // Allow for counter being incremented only at EOL
}
}

View File

@ -70,12 +70,12 @@ abstract class Lexer {
}
/**
* Returns the number of lines read
* Returns the current line number
*
* @return the number of EOLs seen so far
* @return the current line number
*/
long getLineNumber() {
return in.getLineNumber();
long getCurrentLineNumber() {
return in.getCurrentLineNumber();
}
// TODO escape handling needs more work

View File

@ -198,7 +198,7 @@ class CSVLexer1 extends Lexer {
*/
private Token encapsulatedTokenLexer(final Token tkn, int c) throws IOException {
// save current line
final long startLineNumber = getLineNumber();
final long startLineNumber = getCurrentLineNumber();
// ignore the given delimiter
// assert c == delimiter;
while (true) {
@ -230,7 +230,7 @@ class CSVLexer1 extends Lexer {
return tkn;
} else if (!isWhitespace(c)) {
// error invalid char between token and next delimiter
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
}
}
}

View File

@ -187,7 +187,7 @@ class CSVLexer1306663 extends Lexer {
*/
private Token encapsulatedTokenLexer(final Token tkn) throws IOException {
// save current line
final long startLineNumber = getLineNumber();
final long startLineNumber = getCurrentLineNumber();
// ignore the given delimiter
// assert c == delimiter;
int c;
@ -218,7 +218,7 @@ class CSVLexer1306663 extends Lexer {
return tkn;
} else if (!isWhitespace(c)) {
// error invalid char between token and next delimiter
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
}
}
}

View File

@ -187,7 +187,7 @@ class CSVLexer1306667 extends Lexer {
*/
private Token encapsulatedTokenLexer(final Token tkn) throws IOException {
// save current line
final long startLineNumber = getLineNumber();
final long startLineNumber = getCurrentLineNumber();
// ignore the given delimiter
// assert c == delimiter;
int c;
@ -218,7 +218,7 @@ class CSVLexer1306667 extends Lexer {
return tkn;
} else if (!isWhitespace(c)) {
// error invalid char between token and next delimiter
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
}
}
}

View File

@ -170,7 +170,7 @@ class CSVLexer3 extends Lexer {
state = State.ESCAPE_QUOTE;
break;
case EOFCHAR:
throw new IOException("(line " + getLineNumber() + ") unexpected EOF in quoted string");
throw new IOException("(line " + getCurrentLineNumber() + ") unexpected EOF in quoted string");
default:
tkn.content.append((char) intch);
break;
@ -194,7 +194,7 @@ class CSVLexer3 extends Lexer {
case WHITESPACE: // trailing whitespace may be allowed
if (!ignoreSurroundingSpaces) {
// error invalid char between token and next delimiter
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
}
break;
// Everything else is invalid
@ -202,7 +202,7 @@ class CSVLexer3 extends Lexer {
case OTHER:
case COMMENT_START:
// error invalid char between token and next delimiter
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
}
break;
case ESCAPE_PLAIN:
@ -221,7 +221,7 @@ class CSVLexer3 extends Lexer {
tkn.content.append((char) intch);
break;
case EOFCHAR:
throw new IOException("(line " + getLineNumber() + ") unexpected EOF in escape sequence");
throw new IOException("(line " + getCurrentLineNumber() + ") unexpected EOF in escape sequence");
}
break;
case ESCAPE_QUOTE:
@ -239,7 +239,7 @@ class CSVLexer3 extends Lexer {
tkn.content.append((char) intch);
break;
case EOFCHAR:
throw new IOException("(line " + getLineNumber() + ") unexpected EOF in escape sequence");
throw new IOException("(line " + getCurrentLineNumber() + ") unexpected EOF in escape sequence");
}
break;
default:

View File

@ -629,21 +629,21 @@ public class CSVParserTest {
CSVFormat.newBuilder().withRecordSeparator(CRLF).build());
CSVRecord record;
assertEquals(0, parser.getRecordNumber());
assertEquals(0, parser.getLineNumber());
assertEquals(0, parser.getCurrentLineNumber());
assertNotNull(record = parser.nextRecord());
assertEquals(3, parser.getLineNumber());
assertEquals(3, parser.getCurrentLineNumber());
assertEquals(1, record.getRecordNumber());
assertEquals(1, parser.getRecordNumber());
assertNotNull(record = parser.nextRecord());
assertEquals(6, parser.getLineNumber());
assertEquals(6, parser.getCurrentLineNumber());
assertEquals(2, record.getRecordNumber());
assertEquals(2, parser.getRecordNumber());
assertNotNull(record = parser.nextRecord());
assertEquals(8, parser.getLineNumber());
assertEquals(8, parser.getCurrentLineNumber());
assertEquals(3, record.getRecordNumber());
assertEquals(3, parser.getRecordNumber());
assertNull(record = parser.nextRecord());
assertEquals(8, parser.getLineNumber());
assertEquals(8, parser.getCurrentLineNumber());
assertEquals(3, parser.getRecordNumber());
}
@ -676,17 +676,17 @@ public class CSVParserTest {
private void validateLineNumbers(final String lineSeparator) throws IOException {
final CSVParser parser = new CSVParser("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.newBuilder().withRecordSeparator(lineSeparator).build());
assertEquals(0, parser.getLineNumber());
assertEquals(0, parser.getCurrentLineNumber());
assertNotNull(parser.nextRecord());
assertEquals(1, parser.getLineNumber());
assertEquals(1, parser.getCurrentLineNumber());
assertNotNull(parser.nextRecord());
assertEquals(2, parser.getLineNumber());
assertEquals(2, parser.getCurrentLineNumber());
assertNotNull(parser.nextRecord());
// Still 2 because the last line is does not have EOL chars
assertEquals(2, parser.getLineNumber());
assertEquals(2, parser.getCurrentLineNumber());
assertNull(parser.nextRecord());
// Still 2 because the last line is does not have EOL chars
assertEquals(2, parser.getLineNumber());
assertEquals(2, parser.getCurrentLineNumber());
}
}

View File

@ -47,44 +47,50 @@ public class ExtendedBufferedReaderTest {
@Test
public void testReadLookahead1() throws Exception {
final ExtendedBufferedReader br = getBufferedReader("1\n2\r3\n");
assertEquals(0, br.getCurrentLineNumber());
assertEquals('1', br.lookAhead());
assertEquals(UNDEFINED, br.getLastChar());
assertEquals('1', br.read());
assertEquals(0, br.getCurrentLineNumber());
assertEquals('1', br.read()); // Start line 1
assertEquals('1', br.getLastChar());
assertEquals(0, br.getLineNumber());
assertEquals(1, br.getCurrentLineNumber());
assertEquals('\n', br.lookAhead());
assertEquals(0, br.getLineNumber());
assertEquals(1, br.getCurrentLineNumber());
assertEquals('1', br.getLastChar());
assertEquals('\n', br.read());
assertEquals(1, br.getLineNumber());
assertEquals(1, br.getCurrentLineNumber());
assertEquals('\n', br.getLastChar());
assertEquals(1, br.getLineNumber());
assertEquals(1, br.getCurrentLineNumber());
assertEquals('2', br.lookAhead());
assertEquals(1, br.getLineNumber());
assertEquals(1, br.getCurrentLineNumber());
assertEquals('\n', br.getLastChar());
assertEquals(1, br.getLineNumber());
assertEquals('2', br.read());
assertEquals(1, br.getCurrentLineNumber());
assertEquals('2', br.read()); // Start line 2
assertEquals(2, br.getCurrentLineNumber());
assertEquals('2', br.getLastChar());
assertEquals('\r', br.lookAhead());
assertEquals(2, br.getCurrentLineNumber());
assertEquals('2', br.getLastChar());
assertEquals('\r', br.read());
assertEquals('\r', br.getLastChar());
assertEquals(2, br.getCurrentLineNumber());
assertEquals('3', br.lookAhead());
assertEquals('\r', br.getLastChar());
assertEquals('3', br.read());
assertEquals('3', br.read()); // Start line 3
assertEquals('3', br.getLastChar());
assertEquals(3, br.getCurrentLineNumber());
assertEquals('\n', br.lookAhead());
assertEquals(2, br.getLineNumber());
assertEquals(3, br.getCurrentLineNumber());
assertEquals('3', br.getLastChar());
assertEquals('\n', br.read());
assertEquals(3, br.getLineNumber());
assertEquals(3, br.getCurrentLineNumber());
assertEquals('\n', br.getLastChar());
assertEquals(3, br.getLineNumber());
assertEquals(3, br.getCurrentLineNumber());
assertEquals(END_OF_STREAM, br.lookAhead());
assertEquals('\n', br.getLastChar());
@ -92,6 +98,7 @@ public class ExtendedBufferedReaderTest {
assertEquals(END_OF_STREAM, br.getLastChar());
assertEquals(END_OF_STREAM, br.read());
assertEquals(END_OF_STREAM, br.lookAhead());
assertEquals(3, br.getCurrentLineNumber());
}
@ -125,28 +132,28 @@ public class ExtendedBufferedReaderTest {
assertNull(br.readLine());
br = getBufferedReader("foo\n\nhello");
assertEquals(0, br.getLineNumber());
assertEquals(0, br.getCurrentLineNumber());
assertEquals("foo",br.readLine());
assertEquals(1, br.getLineNumber());
assertEquals(1, br.getCurrentLineNumber());
assertEquals("",br.readLine());
assertEquals(2, br.getLineNumber());
assertEquals(2, br.getCurrentLineNumber());
assertEquals("hello",br.readLine());
assertEquals(3, br.getLineNumber());
assertEquals(3, br.getCurrentLineNumber());
assertNull(br.readLine());
assertEquals(3, br.getLineNumber());
assertEquals(3, br.getCurrentLineNumber());
br = getBufferedReader("foo\n\nhello");
assertEquals('f', br.read());
assertEquals('o', br.lookAhead());
assertEquals("oo",br.readLine());
assertEquals(1, br.getLineNumber());
assertEquals(1, br.getCurrentLineNumber());
assertEquals('\n', br.lookAhead());
assertEquals("",br.readLine());
assertEquals(2, br.getLineNumber());
assertEquals(2, br.getCurrentLineNumber());
assertEquals('h', br.lookAhead());
assertEquals("hello",br.readLine());
assertNull(br.readLine());
assertEquals(3, br.getLineNumber());
assertEquals(3, br.getCurrentLineNumber());
br = getBufferedReader("foo\rbaar\r\nfoo");
@ -171,20 +178,20 @@ public class ExtendedBufferedReaderTest {
ExtendedBufferedReader br;
br = getBufferedReader(test);
assertEquals(0, br.getLineNumber());
assertEquals(0, br.getCurrentLineNumber());
while(br.readLine()!=null) {}
assertEquals(EOLeolct, br.getLineNumber());
assertEquals(EOLeolct, br.getCurrentLineNumber());
br = getBufferedReader(test);
assertEquals(0, br.getLineNumber());
assertEquals(0, br.getCurrentLineNumber());
while(br.read()!=-1) {}
assertEquals(EOLeolct, br.getLineNumber());
assertEquals(EOLeolct, br.getCurrentLineNumber());
br = getBufferedReader(test);
assertEquals(0, br.getLineNumber());
assertEquals(0, br.getCurrentLineNumber());
final char[] buff = new char[10];
while(br.read(buff ,0, 3)!=-1) {}
assertEquals(EOLeolct, br.getLineNumber());
assertEquals(EOLeolct, br.getCurrentLineNumber());
}
private ExtendedBufferedReader getBufferedReader(final String s) {