Package private classes are not prefixed with "CSV": CSVLexer -> Lexer.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1511462 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Gary D. Gregory 2013-08-07 20:00:26 +00:00
parent 643b628af4
commit 65f6f1dfe8
4 changed files with 36 additions and 36 deletions

View File

@ -217,7 +217,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
private final CSVFormat format; private final CSVFormat format;
private final Map<String, Integer> headerMap; private final Map<String, Integer> headerMap;
private final CSVLexer lexer; private final Lexer lexer;
/** A record buffer for getRecord(). Grows as necessary and is reused. */ /** A record buffer for getRecord(). Grows as necessary and is reused. */
private final List<String> record = new ArrayList<String>(); private final List<String> record = new ArrayList<String>();
@ -265,7 +265,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
public CSVParser(final Reader reader, final CSVFormat format) throws IOException { public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
format.validate(); format.validate();
this.format = format; this.format = format;
this.lexer = new CSVLexer(format, new ExtendedBufferedReader(reader)); this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
this.headerMap = this.initializeHeader(); this.headerMap = this.initializeHeader();
} }

View File

@ -37,7 +37,7 @@ import java.io.IOException;
* *
* @version $Id$ * @version $Id$
*/ */
final class CSVLexer { final class Lexer {
/** /**
* Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it * Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it
@ -58,7 +58,7 @@ final class CSVLexer {
private final ExtendedBufferedReader in; private final ExtendedBufferedReader in;
/** INTERNAL API. but ctor needs to be called dynamically by PerformanceTest class */ /** INTERNAL API. but ctor needs to be called dynamically by PerformanceTest class */
CSVLexer(final CSVFormat format, final ExtendedBufferedReader in) { Lexer(final CSVFormat format, final ExtendedBufferedReader in) {
this.in = in; this.in = in;
this.delimiter = format.getDelimiter(); this.delimiter = format.getDelimiter();
this.escape = mapNullToDisabled(format.getEscape()); this.escape = mapNullToDisabled(format.getEscape());

View File

@ -52,14 +52,14 @@ public class CSVLexerTest {
formatWithEscaping = CSVFormat.DEFAULT.withEscape('\\'); formatWithEscaping = CSVFormat.DEFAULT.withEscape('\\');
} }
private CSVLexer getLexer(final String input, final CSVFormat format) { private Lexer getLexer(final String input, final CSVFormat format) {
return new CSVLexer(format, new ExtendedBufferedReader(new StringReader(input))); return new Lexer(format, new ExtendedBufferedReader(new StringReader(input)));
} }
@Test @Test
public void testSurroundingSpacesAreDeleted() throws IOException { public void testSurroundingSpacesAreDeleted() throws IOException {
final String code = "noSpaces, leadingSpaces,trailingSpaces , surroundingSpaces , ,,"; final String code = "noSpaces, leadingSpaces,trailingSpaces , surroundingSpaces , ,,";
final CSVLexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(true)); final Lexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(true));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "noSpaces")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "noSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingSpaces")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingSpaces")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingSpaces"));
@ -72,7 +72,7 @@ public class CSVLexerTest {
@Test @Test
public void testSurroundingTabsAreDeleted() throws IOException { public void testSurroundingTabsAreDeleted() throws IOException {
final String code = "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,,"; final String code = "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,,";
final CSVLexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(true)); final Lexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(true));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "noTabs")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "noTabs"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingTab")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingTab"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingTab")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingTab"));
@ -99,7 +99,7 @@ public class CSVLexerTest {
"\n"+ "\n"+
"\n"; "\n";
final CSVFormat format = CSVFormat.DEFAULT.withIgnoreEmptyLines(true); final CSVFormat format = CSVFormat.DEFAULT.withIgnoreEmptyLines(true);
final CSVLexer parser = getLexer(code, format); final Lexer parser = getLexer(code, format);
assertThat(parser.nextToken(new Token()), matches(TOKEN, "first")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "first"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "line")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "line"));
@ -123,7 +123,7 @@ public class CSVLexerTest {
"# penultimate comment\n"+ "# penultimate comment\n"+
"# Final comment\n"; "# Final comment\n";
final CSVFormat format = CSVFormat.DEFAULT.withCommentStart('#'); final CSVFormat format = CSVFormat.DEFAULT.withCommentStart('#');
final CSVLexer parser = getLexer(code, format); final Lexer parser = getLexer(code, format);
assertThat(parser.nextToken(new Token()), matches(TOKEN, "first")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "first"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "line")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "line"));
@ -161,7 +161,7 @@ public class CSVLexerTest {
final CSVFormat format = CSVFormat.DEFAULT.withCommentStart('#').withIgnoreEmptyLines(false); final CSVFormat format = CSVFormat.DEFAULT.withCommentStart('#').withIgnoreEmptyLines(false);
assertFalse("Should not ignore empty lines", format.getIgnoreEmptyLines()); assertFalse("Should not ignore empty lines", format.getIgnoreEmptyLines());
final CSVLexer parser = getLexer(code, format); final Lexer parser = getLexer(code, format);
assertThat(parser.nextToken(new Token()), matches(TOKEN, "1")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "1"));
@ -199,7 +199,7 @@ public class CSVLexerTest {
final String code = "a,\\,,b\\\n\\,,"; final String code = "a,\\,,b\\\n\\,,";
final CSVFormat format = CSVFormat.DEFAULT; final CSVFormat format = CSVFormat.DEFAULT;
assertFalse(format.isEscaping()); assertFalse(format.isEscaping());
final CSVLexer parser = getLexer(code, format); final Lexer parser = getLexer(code, format);
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
// an unquoted single backslash is not an escape char // an unquoted single backslash is not an escape char
@ -221,7 +221,7 @@ public class CSVLexerTest {
final String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\r\ne"; final String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\r\ne";
final CSVFormat format = formatWithEscaping.withIgnoreEmptyLines(false); final CSVFormat format = formatWithEscaping.withIgnoreEmptyLines(false);
assertTrue(format.isEscaping()); assertTrue(format.isEscaping());
final CSVLexer parser = getLexer(code, format); final Lexer parser = getLexer(code, format);
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ",")); assertThat(parser.nextToken(new Token()), matches(TOKEN, ","));
@ -241,7 +241,7 @@ public class CSVLexerTest {
* a, " foo " ,b * a, " foo " ,b
*/ */
final String code = "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b"; final String code = "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
final CSVLexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(true)); final Lexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(true));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "b"));
@ -261,7 +261,7 @@ public class CSVLexerTest {
@Test @Test
public void testNextToken5() throws IOException { public void testNextToken5() throws IOException {
final String code = "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\""; final String code = "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\"";
final CSVLexer parser = getLexer(code, CSVFormat.DEFAULT); final Lexer parser = getLexer(code, CSVFormat.DEFAULT);
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo\n")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo\n"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "b"));
@ -280,7 +280,7 @@ public class CSVLexerTest {
*/ */
final String code = "a;'b and '' more\n'\n!comment;;;;\n;;"; final String code = "a;'b and '' more\n'\n!comment;;;;\n;;";
final CSVFormat format = CSVFormat.DEFAULT.withQuoteChar('\'').withCommentStart('!').withDelimiter(';'); final CSVFormat format = CSVFormat.DEFAULT.withQuoteChar('\'').withCommentStart('!').withDelimiter(';');
final CSVLexer parser = getLexer(code, format); final Lexer parser = getLexer(code, format);
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b and ' more\n")); assertThat(parser.nextToken(new Token()), matches(EORECORD, "b and ' more\n"));
} }
@ -289,7 +289,7 @@ public class CSVLexerTest {
@Test @Test
public void testDelimiterIsWhitespace() throws IOException { public void testDelimiterIsWhitespace() throws IOException {
final String code = "one\ttwo\t\tfour \t five\t six"; final String code = "one\ttwo\t\tfour \t five\t six";
final CSVLexer parser = getLexer(code, CSVFormat.TDF); final Lexer parser = getLexer(code, CSVFormat.TDF);
assertThat(parser.nextToken(new Token()), matches(TOKEN, "one")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "one"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "two")); assertThat(parser.nextToken(new Token()), matches(TOKEN, "two"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "")); assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
@ -300,96 +300,96 @@ public class CSVLexerTest {
@Test @Test
public void testEscapedCR() throws Exception { public void testEscapedCR() throws Exception {
final CSVLexer lexer = getLexer("character\\" + CR + "Escaped", formatWithEscaping); final Lexer lexer = getLexer("character\\" + CR + "Escaped", formatWithEscaping);
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
} }
@Test @Test
public void testCR() throws Exception { public void testCR() throws Exception {
final CSVLexer lexer = getLexer("character" + CR + "NotEscaped", formatWithEscaping); final Lexer lexer = getLexer("character" + CR + "NotEscaped", formatWithEscaping);
assertThat(lexer.nextToken(new Token()), hasContent("character")); assertThat(lexer.nextToken(new Token()), hasContent("character"));
assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
} }
@Test @Test
public void testEscapedLF() throws Exception { public void testEscapedLF() throws Exception {
final CSVLexer lexer = getLexer("character\\" + LF + "Escaped", formatWithEscaping); final Lexer lexer = getLexer("character\\" + LF + "Escaped", formatWithEscaping);
assertThat(lexer.nextToken(new Token()), hasContent("character" + LF + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + LF + "Escaped"));
} }
@Test @Test
public void testLF() throws Exception { public void testLF() throws Exception {
final CSVLexer lexer = getLexer("character" + LF + "NotEscaped", formatWithEscaping); final Lexer lexer = getLexer("character" + LF + "NotEscaped", formatWithEscaping);
assertThat(lexer.nextToken(new Token()), hasContent("character")); assertThat(lexer.nextToken(new Token()), hasContent("character"));
assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
} }
@Test // TODO is this correct? Do we expect <esc>TAB to be unescaped? @Test // TODO is this correct? Do we expect <esc>TAB to be unescaped?
public void testEscapedTab() throws Exception { public void testEscapedTab() throws Exception {
final CSVLexer lexer = getLexer("character\\" + TAB + "Escaped", formatWithEscaping); final Lexer lexer = getLexer("character\\" + TAB + "Escaped", formatWithEscaping);
assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "Escaped"));
} }
@Test @Test
public void testTab() throws Exception { public void testTab() throws Exception {
final CSVLexer lexer = getLexer("character" + TAB + "NotEscaped", formatWithEscaping); final Lexer lexer = getLexer("character" + TAB + "NotEscaped", formatWithEscaping);
assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "NotEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "NotEscaped"));
} }
@Test // TODO is this correct? Do we expect <esc>BACKSPACE to be unescaped? @Test // TODO is this correct? Do we expect <esc>BACKSPACE to be unescaped?
public void testEscapedBackspace() throws Exception { public void testEscapedBackspace() throws Exception {
final CSVLexer lexer = getLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping); final Lexer lexer = getLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping);
assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "Escaped"));
} }
@Test @Test
public void testBackspace() throws Exception { public void testBackspace() throws Exception {
final CSVLexer lexer = getLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping); final Lexer lexer = getLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping);
assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "NotEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "NotEscaped"));
} }
@Test // TODO is this correct? Do we expect <esc>FF to be unescaped? @Test // TODO is this correct? Do we expect <esc>FF to be unescaped?
public void testEscapedFF() throws Exception { public void testEscapedFF() throws Exception {
final CSVLexer lexer = getLexer("character\\" + FF + "Escaped", formatWithEscaping); final Lexer lexer = getLexer("character\\" + FF + "Escaped", formatWithEscaping);
assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "Escaped"));
} }
@Test @Test
public void testFF() throws Exception { public void testFF() throws Exception {
final CSVLexer lexer = getLexer("character" + FF + "NotEscaped", formatWithEscaping); final Lexer lexer = getLexer("character" + FF + "NotEscaped", formatWithEscaping);
assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "NotEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "NotEscaped"));
} }
@Test @Test
public void testEscapedMySqlNullValue() throws Exception { public void testEscapedMySqlNullValue() throws Exception {
// MySQL uses \N to symbolize null values. We have to restore this // MySQL uses \N to symbolize null values. We have to restore this
final CSVLexer lexer = getLexer("character\\NEscaped", formatWithEscaping); final Lexer lexer = getLexer("character\\NEscaped", formatWithEscaping);
assertThat(lexer.nextToken(new Token()), hasContent("character\\NEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("character\\NEscaped"));
} }
@Test @Test
public void testEscapedCharacter() throws Exception { public void testEscapedCharacter() throws Exception {
final CSVLexer lexer = getLexer("character\\aEscaped", formatWithEscaping); final Lexer lexer = getLexer("character\\aEscaped", formatWithEscaping);
assertThat(lexer.nextToken(new Token()), hasContent("character\\aEscaped")); assertThat(lexer.nextToken(new Token()), hasContent("character\\aEscaped"));
} }
@Test @Test
public void testEscapedControlCharacter() throws Exception { public void testEscapedControlCharacter() throws Exception {
// we are explicitly using an escape different from \ here // we are explicitly using an escape different from \ here
final CSVLexer lexer = getLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!')); final Lexer lexer = getLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!'));
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
} }
@Test @Test
public void testEscapedControlCharacter2() throws Exception { public void testEscapedControlCharacter2() throws Exception {
final CSVLexer lexer = getLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\')); final Lexer lexer = getLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\'));
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped")); assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
} }
@Test(expected = IOException.class) @Test(expected = IOException.class)
public void testEscapingAtEOF() throws Exception { public void testEscapingAtEOF() throws Exception {
final String code = "escaping at EOF is evil\\"; final String code = "escaping at EOF is evil\\";
final CSVLexer lexer = getLexer(code, formatWithEscaping); final Lexer lexer = getLexer(code, formatWithEscaping);
lexer.nextToken(new Token()); lexer.nextToken(new Token());
} }

View File

@ -224,9 +224,9 @@ public class PerformanceTest {
} }
private static Constructor<CSVLexer> getLexerCtor(final String clazz) throws Exception { private static Constructor<Lexer> getLexerCtor(final String clazz) throws Exception {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
final Class<CSVLexer> lexer = (Class<CSVLexer>) Class.forName("org.apache.commons.csv." + clazz); final Class<Lexer> lexer = (Class<Lexer>) Class.forName("org.apache.commons.csv." + clazz);
return lexer.getConstructor(new Class<?>[]{CSVFormat.class, ExtendedBufferedReader.class}); return lexer.getConstructor(new Class<?>[]{CSVFormat.class, ExtendedBufferedReader.class});
} }
@ -235,12 +235,12 @@ public class PerformanceTest {
String dynamic = ""; String dynamic = "";
for (int i = 0; i < max; i++) { for (int i = 0; i < max; i++) {
final ExtendedBufferedReader input = new ExtendedBufferedReader(getReader()); final ExtendedBufferedReader input = new ExtendedBufferedReader(getReader());
CSVLexer lexer = null; Lexer lexer = null;
if (test.startsWith("CSVLexer")) { if (test.startsWith("CSVLexer")) {
dynamic="!"; dynamic="!";
lexer = getLexerCtor(test).newInstance(new Object[]{format, input}); lexer = getLexerCtor(test).newInstance(new Object[]{format, input});
} else { } else {
lexer = new CSVLexer(format, input); lexer = new Lexer(format, input);
} }
int count = 0; int count = 0;
int fields = 0; int fields = 0;