Improve escape tests
Fix bug in readEscape() - was not handling EOF git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1306890 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5c9697c158
commit
a07c9f72fc
|
@ -60,6 +60,7 @@ abstract class Lexer {
|
|||
return in.getLineNumber();
|
||||
}
|
||||
|
||||
// TODO escape handling needs more work
|
||||
int readEscape() throws IOException {
|
||||
// assume c is the escape char (normally a backslash)
|
||||
int c = in.read();
|
||||
|
@ -74,6 +75,8 @@ abstract class Lexer {
|
|||
return '\b';
|
||||
case 'f':
|
||||
return '\f';
|
||||
case ExtendedBufferedReader.END_OF_STREAM:
|
||||
throw new IOException("EOF whilst processing escape sequence");
|
||||
default:
|
||||
return c;
|
||||
}
|
||||
|
|
|
@ -147,28 +147,66 @@ public class CSVLexerTest {
|
|||
|
||||
}
|
||||
|
||||
// simple token with escaping
|
||||
// simple token with escaping not enabled
|
||||
@Test
|
||||
public void testNextToken3() throws IOException {
|
||||
/* file: a,\,,b
|
||||
* \,,
|
||||
*/
|
||||
String code = "a,\\,,b\n\\,,";
|
||||
CSVFormat format = CSVFormat.DEFAULT.withCommentStart('#');
|
||||
String code = "a,\\,,b\\\n\\,,";
|
||||
CSVFormat format = CSVFormat.DEFAULT;
|
||||
assertFalse(format.isEscaping());
|
||||
Lexer parser = getLexer(code, format);
|
||||
|
||||
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
|
||||
// an unquoted single backslash is not an escape char
|
||||
assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));
|
||||
assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));
|
||||
assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));
|
||||
assertTokenEquals(EORECORD, "b\\", parser.nextToken(new Token()));
|
||||
// an unquoted single backslash is not an escape char
|
||||
assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));
|
||||
assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));
|
||||
assertTokenEquals(EOF, "", parser.nextToken(new Token()));
|
||||
}
|
||||
|
||||
// encapsulator tokenizer (sinle line)
|
||||
// simple token with escaping enabled
|
||||
@Test
|
||||
public void testNextToken3Escaping() throws IOException {
|
||||
/* file: a,\,,b
|
||||
* \,,
|
||||
*/
|
||||
String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\n";
|
||||
CSVFormat format = CSVFormat.DEFAULT.withEscape('\\');
|
||||
assertTrue(format.isEscaping());
|
||||
Lexer parser = getLexer(code, format);
|
||||
|
||||
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
|
||||
assertTokenEquals(TOKEN, ",", parser.nextToken(new Token()));
|
||||
assertTokenEquals(EORECORD, "b\\", parser.nextToken(new Token()));
|
||||
assertTokenEquals(TOKEN, ",", parser.nextToken(new Token()));
|
||||
assertTokenEquals(TOKEN, "\nc", parser.nextToken(new Token()));
|
||||
assertTokenEquals(EOF, "d\n", parser.nextToken(new Token()));
|
||||
assertTokenEquals(EOF, "", parser.nextToken(new Token()));
|
||||
}
|
||||
|
||||
// simple token with escaping enabled
|
||||
@Test
|
||||
public void testNextToken3BadEscaping() throws IOException {
|
||||
String code = "a,b,c\\";
|
||||
CSVFormat format = CSVFormat.DEFAULT.withEscape('\\');
|
||||
assertTrue(format.isEscaping());
|
||||
Lexer parser = getLexer(code, format);
|
||||
|
||||
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
|
||||
assertTokenEquals(TOKEN, "b", parser.nextToken(new Token()));
|
||||
try {
|
||||
Token tkn = parser.nextToken(new Token());
|
||||
fail("Expected IOE, found "+tkn);
|
||||
} catch (IOException e) {
|
||||
}
|
||||
}
|
||||
|
||||
// encapsulator tokenizer (single line)
|
||||
@Test
|
||||
public void testNextToken4() throws IOException {
|
||||
/* file: a,"foo",b
|
||||
|
|
Loading…
Reference in New Issue