Improve escape tests
Fix bug in readEscape() - was not handling EOF git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1306890 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5c9697c158
commit
a07c9f72fc
|
@ -60,6 +60,7 @@ abstract class Lexer {
|
||||||
return in.getLineNumber();
|
return in.getLineNumber();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO escape handling needs more work
|
||||||
int readEscape() throws IOException {
|
int readEscape() throws IOException {
|
||||||
// assume c is the escape char (normally a backslash)
|
// assume c is the escape char (normally a backslash)
|
||||||
int c = in.read();
|
int c = in.read();
|
||||||
|
@ -74,6 +75,8 @@ abstract class Lexer {
|
||||||
return '\b';
|
return '\b';
|
||||||
case 'f':
|
case 'f':
|
||||||
return '\f';
|
return '\f';
|
||||||
|
case ExtendedBufferedReader.END_OF_STREAM:
|
||||||
|
throw new IOException("EOF whilst processing escape sequence");
|
||||||
default:
|
default:
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
|
@ -147,28 +147,66 @@ public class CSVLexerTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// simple token with escaping
|
// simple token with escaping not enabled
|
||||||
@Test
|
@Test
|
||||||
public void testNextToken3() throws IOException {
|
public void testNextToken3() throws IOException {
|
||||||
/* file: a,\,,b
|
/* file: a,\,,b
|
||||||
* \,,
|
* \,,
|
||||||
*/
|
*/
|
||||||
String code = "a,\\,,b\n\\,,";
|
String code = "a,\\,,b\\\n\\,,";
|
||||||
CSVFormat format = CSVFormat.DEFAULT.withCommentStart('#');
|
CSVFormat format = CSVFormat.DEFAULT;
|
||||||
|
assertFalse(format.isEscaping());
|
||||||
Lexer parser = getLexer(code, format);
|
Lexer parser = getLexer(code, format);
|
||||||
|
|
||||||
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
|
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
|
||||||
// an unquoted single backslash is not an escape char
|
// an unquoted single backslash is not an escape char
|
||||||
assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));
|
assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));
|
||||||
assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));
|
assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));
|
||||||
assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));
|
assertTokenEquals(EORECORD, "b\\", parser.nextToken(new Token()));
|
||||||
// an unquoted single backslash is not an escape char
|
// an unquoted single backslash is not an escape char
|
||||||
assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));
|
assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));
|
||||||
assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));
|
assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));
|
||||||
assertTokenEquals(EOF, "", parser.nextToken(new Token()));
|
assertTokenEquals(EOF, "", parser.nextToken(new Token()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// encapsulator tokenizer (sinle line)
|
// simple token with escaping enabled
|
||||||
|
@Test
|
||||||
|
public void testNextToken3Escaping() throws IOException {
|
||||||
|
/* file: a,\,,b
|
||||||
|
* \,,
|
||||||
|
*/
|
||||||
|
String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\n";
|
||||||
|
CSVFormat format = CSVFormat.DEFAULT.withEscape('\\');
|
||||||
|
assertTrue(format.isEscaping());
|
||||||
|
Lexer parser = getLexer(code, format);
|
||||||
|
|
||||||
|
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
|
||||||
|
assertTokenEquals(TOKEN, ",", parser.nextToken(new Token()));
|
||||||
|
assertTokenEquals(EORECORD, "b\\", parser.nextToken(new Token()));
|
||||||
|
assertTokenEquals(TOKEN, ",", parser.nextToken(new Token()));
|
||||||
|
assertTokenEquals(TOKEN, "\nc", parser.nextToken(new Token()));
|
||||||
|
assertTokenEquals(EOF, "d\n", parser.nextToken(new Token()));
|
||||||
|
assertTokenEquals(EOF, "", parser.nextToken(new Token()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// simple token with escaping enabled
|
||||||
|
@Test
|
||||||
|
public void testNextToken3BadEscaping() throws IOException {
|
||||||
|
String code = "a,b,c\\";
|
||||||
|
CSVFormat format = CSVFormat.DEFAULT.withEscape('\\');
|
||||||
|
assertTrue(format.isEscaping());
|
||||||
|
Lexer parser = getLexer(code, format);
|
||||||
|
|
||||||
|
assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));
|
||||||
|
assertTokenEquals(TOKEN, "b", parser.nextToken(new Token()));
|
||||||
|
try {
|
||||||
|
Token tkn = parser.nextToken(new Token());
|
||||||
|
fail("Expected IOE, found "+tkn);
|
||||||
|
} catch (IOException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// encapsulator tokenizer (single line)
|
||||||
@Test
|
@Test
|
||||||
public void testNextToken4() throws IOException {
|
public void testNextToken4() throws IOException {
|
||||||
/* file: a,"foo",b
|
/* file: a,"foo",b
|
||||||
|
|
Loading…
Reference in New Issue