Improve escape tests

Fix bug in readEscape() - was not handling EOF

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1306890 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sebastian Bazley 2012-03-29 15:01:04 +00:00
parent 5c9697c158
commit a07c9f72fc
2 changed files with 46 additions and 5 deletions

View File

@ -60,6 +60,7 @@ abstract class Lexer {
return in.getLineNumber();
}
// TODO escape handling needs more work
int readEscape() throws IOException {
// assume c is the escape char (normally a backslash)
int c = in.read();
@ -74,6 +75,8 @@ abstract class Lexer {
return '\b';
case 'f':
return '\f';
case ExtendedBufferedReader.END_OF_STREAM:
throw new IOException("EOF whilst processing escape sequence");
default:
return c;
}

View File

@ -147,28 +147,66 @@ public class CSVLexerTest {
}
// simple token with escaping
// simple token with escaping not enabled
@Test
public void testNextToken3() throws IOException {
/* file: a,\,,b
* \,,
*/
String code = "a,\\,,b\n\\,,";
CSVFormat format = CSVFormat.DEFAULT.withCommentStart('#');
String code = "a,\\,,b\\\n\\,,";
CSVFormat format = CSVFormat.DEFAULT;
assertFalse(format.isEscaping());
Lexer parser = getLexer(code, format);
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
// an unquoted single backslash is not an escape char
assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));
assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));
assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));
assertTokenEquals(EORECORD, "b\\", parser.nextToken(new Token()));
// an unquoted single backslash is not an escape char
assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));
assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));
assertTokenEquals(EOF, "", parser.nextToken(new Token()));
}
// encapsulator tokenizer (sinle line)
// simple token with escaping enabled
@Test
public void testNextToken3Escaping() throws IOException {
/* file: a,\,,b
* \,,
*/
String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\n";
CSVFormat format = CSVFormat.DEFAULT.withEscape('\\');
assertTrue(format.isEscaping());
Lexer parser = getLexer(code, format);
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
assertTokenEquals(TOKEN, ",", parser.nextToken(new Token()));
assertTokenEquals(EORECORD, "b\\", parser.nextToken(new Token()));
assertTokenEquals(TOKEN, ",", parser.nextToken(new Token()));
assertTokenEquals(TOKEN, "\nc", parser.nextToken(new Token()));
assertTokenEquals(EOF, "d\n", parser.nextToken(new Token()));
assertTokenEquals(EOF, "", parser.nextToken(new Token()));
}
// simple token with escaping enabled
@Test
public void testNextToken3BadEscaping() throws IOException {
String code = "a,b,c\\";
CSVFormat format = CSVFormat.DEFAULT.withEscape('\\');
assertTrue(format.isEscaping());
Lexer parser = getLexer(code, format);
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
assertTokenEquals(TOKEN, "b", parser.nextToken(new Token()));
try {
Token tkn = parser.nextToken(new Token());
fail("Expected IOE, found "+tkn);
} catch (IOException e) {
}
}
// encapsulator tokenizer (single line)
@Test
public void testNextToken4() throws IOException {
/* file: a,"foo",b