Merge Lexer with CSVLexer

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1511006 13f79535-47bb-0310-9956-ffa450edef68
2025-02-06 01:59:07 +00:00 · 2013-08-06 15:44:41 +00:00 · 2013-08-06 15:44:41 +00:00 · 7b168ebbbe
commit 7b168ebbbe
parent 7755640784
5 changed files with 207 additions and 245 deletions
--- a/src/main/java/org/apache/commons/csv/CSVLexer.java
+++ b/src/main/java/org/apache/commons/csv/CSVLexer.java
@ -17,6 +17,13 @@

 package org.apache.commons.csv;

+import static org.apache.commons.csv.Constants.BACKSPACE;
+import static org.apache.commons.csv.Constants.CR;
+import static org.apache.commons.csv.Constants.END_OF_STREAM;
+import static org.apache.commons.csv.Constants.FF;
+import static org.apache.commons.csv.Constants.LF;
+import static org.apache.commons.csv.Constants.TAB;
+import static org.apache.commons.csv.Constants.UNDEFINED;
 import static org.apache.commons.csv.Token.Type.COMMENT;
 import static org.apache.commons.csv.Token.Type.EOF;
 import static org.apache.commons.csv.Token.Type.EORECORD;
@ -30,11 +37,38 @@ import java.io.IOException;
 *
 * @version $Id$
 */
-final class CSVLexer extends Lexer {
+final class CSVLexer {
+
+    /**
+     * Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it
+     * won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two
+     * chars (using surrogates) and thus there should never be a collision with a real text char.
+     */
+    private static final char DISABLED = '\ufffe';
+
+    private final char delimiter;
+    private final char escape;
+    private final char quoteChar;
+    private final char commmentStart;
+
+    final boolean ignoreSurroundingSpaces;
+    final boolean ignoreEmptyLines;
+
+    final CSVFormat format;
+
+    /** The input stream */
+    final ExtendedBufferedReader in;

    /** INTERNAL API. ctor needs to be public so can be called dynamically by PerformanceTest class */
    CSVLexer(final CSVFormat format, final ExtendedBufferedReader in) {
-        super(format, in);
+        this.format = format;
+        this.in = in;
+        this.delimiter = format.getDelimiter();
+        this.escape = mapNullToDisabled(format.getEscape());
+        this.quoteChar = mapNullToDisabled(format.getQuoteChar());
+        this.commmentStart = mapNullToDisabled(format.getCommentStart());
+        this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
+        this.ignoreEmptyLines = format.getIgnoreEmptyLines();
    }

    /**
@ -48,7 +82,6 @@ final class CSVLexer extends Lexer {
     * @throws java.io.IOException
     *             on stream access error
     */
-    @Override
    Token nextToken(final Token token) throws IOException {

        // get the last read char (required for empty line detection)
@ -257,4 +290,144 @@ final class CSVLexer extends Lexer {
        }
    }

+    private final char mapNullToDisabled(final Character c) {
+        return c == null ? DISABLED : c.charValue();
+    }
+
+    /**
+     * Returns the current line number
+     *
+     * @return the current line number
+     */
+    long getCurrentLineNumber() {
+        return in.getCurrentLineNumber();
+    }
+
+    // TODO escape handling needs more work
+    /**
+     * Handle an escape sequence.
+     * The current character must be the escape character.
+     * On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()}
+     * on the input stream.
+     *
+     * @return the unescaped character (as an int) or {@link END_OF_STREAM} if char following the escape is invalid.
+     * @throws IOException if there is a problem reading the stream or the end of stream is detected:
+     * the escape character is not allowed at end of strem
+     */
+    int readEscape() throws IOException {
+        // the escape char has just been read (normally a backslash)
+        final int ch = in.read();
+        switch (ch) {
+        case 'r':
+            return CR;
+        case 'n':
+            return LF;
+        case 't':
+            return TAB;
+        case 'b':
+            return BACKSPACE;
+        case 'f':
+            return FF;
+        case CR:
+        case LF:
+        case FF: // TODO is this correct?
+        case TAB: // TODO is this correct? Do tabs need to be escaped?
+        case BACKSPACE: // TODO is this correct?
+            return ch;
+        case END_OF_STREAM:
+            throw new IOException("EOF whilst processing escape sequence");
+        default:
+            // Now check for meta-characters
+            if (isMetaChar(ch)) {
+                return ch;
+            }
+            // indicate unexpected char - available from in.getLastChar()
+            return END_OF_STREAM;
+        }
+    }
+
+    void trimTrailingSpaces(final StringBuilder buffer) {
+        int length = buffer.length();
+        while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
+            length = length - 1;
+        }
+        if (length != buffer.length()) {
+            buffer.setLength(length);
+        }
+    }
+
+    /**
+     * Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character...
+     *
+     * @return true if the given or next character is a line-terminator
+     */
+    boolean readEndOfLine(int ch) throws IOException {
+        // check if we have \r\n...
+        if (ch == CR && in.lookAhead() == LF) {
+            // note: does not change ch outside of this method!
+            ch = in.read();
+        }
+        return ch == LF || ch == CR;
+    }
+
+    boolean isClosed() {
+        return in.isClosed();
+    }
+
+    /**
+     * @return true if the given char is a whitespace character
+     */
+    boolean isWhitespace(final int ch) {
+        return !isDelimiter(ch) && Character.isWhitespace((char) ch);
+    }
+
+    /**
+     * Checks if the current character represents the start of a line: a CR, LF or is at the start of the file.
+     *
+     * @param ch the character to check
+     * @return true if the character is at the start of a line.
+     */
+    boolean isStartOfLine(final int ch) {
+        return ch == LF || ch == CR || ch == UNDEFINED;
+    }
+
+    /**
+     * @return true if the given character indicates end of file
+     */
+    boolean isEndOfFile(final int ch) {
+        return ch == END_OF_STREAM;
+    }
+
+    boolean isDelimiter(final int ch) {
+        return ch == delimiter;
+    }
+
+    boolean isEscape(final int ch) {
+        return ch == escape;
+    }
+
+    boolean isQuoteChar(final int ch) {
+        return ch == quoteChar;
+    }
+
+    boolean isCommentStart(final int ch) {
+        return ch == commmentStart;
+    }
+
+    private boolean isMetaChar(final int ch) {
+        return ch == delimiter ||
+               ch == escape ||
+               ch == quoteChar ||
+               ch == commmentStart;
+    }
+
+    /**
+     * Closes resources.
+     *
+     * @throws IOException
+     *             If an I/O error occurs
+     */
+    void close() throws IOException {
+        in.close();
+    }
 }
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@ -217,7 +217,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
    private final CSVFormat format;
    private final Map<String, Integer> headerMap;

-    private final Lexer lexer;
+    private final CSVLexer lexer;

    /** A record buffer for getRecord(). Grows as necessary and is reused. */
    private final List<String> record = new ArrayList<String>();
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@ -1,211 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.commons.csv;
-
-import static org.apache.commons.csv.Constants.BACKSPACE;
-import static org.apache.commons.csv.Constants.CR;
-import static org.apache.commons.csv.Constants.END_OF_STREAM;
-import static org.apache.commons.csv.Constants.FF;
-import static org.apache.commons.csv.Constants.LF;
-import static org.apache.commons.csv.Constants.TAB;
-import static org.apache.commons.csv.Constants.UNDEFINED;
-
-import java.io.Closeable;
-import java.io.IOException;
-
-/**
- * Abstract lexer class; contains common utility routines shared by lexers
- *
- * @version $Id$
- */
-abstract class Lexer implements Closeable {
-
-    /**
-     * Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it
-     * won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two
-     * chars (using surrogates) and thus there should never be a collision with a real text char.
-     */
-    private static final char DISABLED = '\ufffe';
-
-    private final char delimiter;
-    private final char escape;
-    private final char quoteChar;
-    private final char commmentStart;
-
-    final boolean ignoreSurroundingSpaces;
-    final boolean ignoreEmptyLines;
-
-    final CSVFormat format;
-
-    /** The input stream */
-    final ExtendedBufferedReader in;
-
-    Lexer(final CSVFormat format, final ExtendedBufferedReader in) {
-        this.format = format;
-        this.in = in;
-        this.delimiter = format.getDelimiter();
-        this.escape = mapNullToDisabled(format.getEscape());
-        this.quoteChar = mapNullToDisabled(format.getQuoteChar());
-        this.commmentStart = mapNullToDisabled(format.getCommentStart());
-        this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
-        this.ignoreEmptyLines = format.getIgnoreEmptyLines();
-    }
-
-    private final char mapNullToDisabled(final Character c) {
-        return c == null ? DISABLED : c.charValue();
-    }
-
-    /**
-     * Returns the current line number
-     *
-     * @return the current line number
-     */
-    long getCurrentLineNumber() {
-        return in.getCurrentLineNumber();
-    }
-
-    // TODO escape handling needs more work
-    /**
-     * Handle an escape sequence.
-     * The current character must be the escape character.
-     * On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()}
-     * on the input stream.
-     *
-     * @return the unescaped character (as an int) or {@link END_OF_STREAM} if char following the escape is invalid.
-     * @throws IOException if there is a problem reading the stream or the end of stream is detected:
-     * the escape character is not allowed at end of strem
-     */
-    int readEscape() throws IOException {
-        // the escape char has just been read (normally a backslash)
-        final int ch = in.read();
-        switch (ch) {
-        case 'r':
-            return CR;
-        case 'n':
-            return LF;
-        case 't':
-            return TAB;
-        case 'b':
-            return BACKSPACE;
-        case 'f':
-            return FF;
-        case CR:
-        case LF:
-        case FF: // TODO is this correct?
-        case TAB: // TODO is this correct? Do tabs need to be escaped?
-        case BACKSPACE: // TODO is this correct?
-            return ch;
-        case END_OF_STREAM:
-            throw new IOException("EOF whilst processing escape sequence");
-        default:
-            // Now check for meta-characters
-            if (isMetaChar(ch)) {
-                return ch;
-            }
-            // indicate unexpected char - available from in.getLastChar()
-            return END_OF_STREAM;
-        }
-    }
-
-    void trimTrailingSpaces(final StringBuilder buffer) {
-        int length = buffer.length();
-        while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
-            length = length - 1;
-        }
-        if (length != buffer.length()) {
-            buffer.setLength(length);
-        }
-    }
-
-    /**
-     * Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character...
-     *
-     * @return true if the given or next character is a line-terminator
-     */
-    boolean readEndOfLine(int ch) throws IOException {
-        // check if we have \r\n...
-        if (ch == CR && in.lookAhead() == LF) {
-            // note: does not change ch outside of this method!
-            ch = in.read();
-        }
-        return ch == LF || ch == CR;
-    }
-
-    abstract Token nextToken(Token reusableToken) throws IOException;
-
-    boolean isClosed() {
-    	return in.isClosed();
-    }
-
-    /**
-     * @return true if the given char is a whitespace character
-     */
-    boolean isWhitespace(final int ch) {
-        return !isDelimiter(ch) && Character.isWhitespace((char) ch);
-    }
-
-    /**
-     * Checks if the current character represents the start of a line: a CR, LF or is at the start of the file.
-     *
-     * @param ch the character to check
-     * @return true if the character is at the start of a line.
-     */
-    boolean isStartOfLine(final int ch) {
-        return ch == LF || ch == CR || ch == UNDEFINED;
-    }
-
-    /**
-     * @return true if the given character indicates end of file
-     */
-    boolean isEndOfFile(final int ch) {
-        return ch == END_OF_STREAM;
-    }
-
-    boolean isDelimiter(final int ch) {
-        return ch == delimiter;
-    }
-
-    boolean isEscape(final int ch) {
-        return ch == escape;
-    }
-
-    boolean isQuoteChar(final int ch) {
-        return ch == quoteChar;
-    }
-
-    boolean isCommentStart(final int ch) {
-        return ch == commmentStart;
-    }
-
-    private boolean isMetaChar(final int ch) {
-        return ch == delimiter ||
-               ch == escape ||
-               ch == quoteChar ||
-               ch == commmentStart;
-    }
-
-    /**
-     * Closes resources.
-     *
-	 * @throws IOException
-	 *             If an I/O error occurs
-     */
-	public void close() throws IOException {
-		in.close();
-	}
-}
--- a/src/test/java/org/apache/commons/csv/CSVLexerTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVLexerTest.java
@ -52,14 +52,14 @@ public class CSVLexerTest {
        formatWithEscaping = CSVFormat.DEFAULT.withEscape('\\');
    }

-    private Lexer getLexer(final String input, final CSVFormat format) {
+    private CSVLexer getLexer(final String input, final CSVFormat format) {
        return new CSVLexer(format, new ExtendedBufferedReader(new StringReader(input)));
    }

    @Test
    public void testSurroundingSpacesAreDeleted() throws IOException {
        final String code = "noSpaces,  leadingSpaces,trailingSpaces  ,  surroundingSpaces  ,  ,,";
-        final Lexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(true));
+        final CSVLexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(true));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "noSpaces"));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingSpaces"));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingSpaces"));
@ -72,7 +72,7 @@ public class CSVLexerTest {
    @Test
    public void testSurroundingTabsAreDeleted() throws IOException {
        final String code = "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,,";
-        final Lexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(true));
+        final CSVLexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(true));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "noTabs"));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingTab"));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingTab"));
@ -99,7 +99,7 @@ public class CSVLexerTest {
                "\n"+
                "\n";
        final CSVFormat format = CSVFormat.DEFAULT.withIgnoreEmptyLines(true);
-        final Lexer parser = getLexer(code, format);
+        final CSVLexer parser = getLexer(code, format);

        assertThat(parser.nextToken(new Token()), matches(TOKEN, "first"));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "line"));
@ -123,7 +123,7 @@ public class CSVLexerTest {
                "# penultimate comment\n"+
                "# Final comment\n";
        final CSVFormat format = CSVFormat.DEFAULT.withCommentStart('#');
-        final Lexer parser = getLexer(code, format);
+        final CSVLexer parser = getLexer(code, format);

        assertThat(parser.nextToken(new Token()), matches(TOKEN, "first"));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "line"));
@ -161,7 +161,7 @@ public class CSVLexerTest {
        final CSVFormat format = CSVFormat.DEFAULT.withCommentStart('#').withIgnoreEmptyLines(false);
        assertFalse("Should not ignore empty lines", format.getIgnoreEmptyLines());

-        final Lexer parser = getLexer(code, format);
+        final CSVLexer parser = getLexer(code, format);


        assertThat(parser.nextToken(new Token()), matches(TOKEN, "1"));
@ -199,7 +199,7 @@ public class CSVLexerTest {
        final String code = "a,\\,,b\\\n\\,,";
        final CSVFormat format = CSVFormat.DEFAULT;
        assertFalse(format.isEscaping());
-        final Lexer parser = getLexer(code, format);
+        final CSVLexer parser = getLexer(code, format);

        assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
        // an unquoted single backslash is not an escape char
@ -221,7 +221,7 @@ public class CSVLexerTest {
        final String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\r\ne";
        final CSVFormat format = formatWithEscaping.withIgnoreEmptyLines(false);
        assertTrue(format.isEscaping());
-        final Lexer parser = getLexer(code, format);
+        final CSVLexer parser = getLexer(code, format);

        assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, ","));
@ -241,7 +241,7 @@ public class CSVLexerTest {
        *        a,  " foo " ,b
        */
        final String code = "a,\"foo\",b\na,   \" foo\",b\na,\"foo \"  ,b\na,  \" foo \"  ,b";
-        final Lexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(true));
+        final CSVLexer parser = getLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces(true));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo"));
        assertThat(parser.nextToken(new Token()), matches(EORECORD, "b"));
@ -261,7 +261,7 @@ public class CSVLexerTest {
    @Test
    public void testNextToken5() throws IOException {
        final String code = "a,\"foo\n\",b\n\"foo\n  baar ,,,\"\n\"\n\t \n\"";
-        final Lexer parser = getLexer(code, CSVFormat.DEFAULT);
+        final CSVLexer parser = getLexer(code, CSVFormat.DEFAULT);
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "foo\n"));
        assertThat(parser.nextToken(new Token()), matches(EORECORD, "b"));
@ -280,7 +280,7 @@ public class CSVLexerTest {
        */
        final String code = "a;'b and '' more\n'\n!comment;;;;\n;;";
        final CSVFormat format = CSVFormat.DEFAULT.withQuoteChar('\'').withCommentStart('!').withDelimiter(';');
-        final Lexer parser = getLexer(code, format);
+        final CSVLexer parser = getLexer(code, format);
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
        assertThat(parser.nextToken(new Token()), matches(EORECORD, "b and ' more\n"));
    }
@ -289,7 +289,7 @@ public class CSVLexerTest {
    @Test
    public void testDelimiterIsWhitespace() throws IOException {
        final String code = "one\ttwo\t\tfour \t five\t six";
-        final Lexer parser = getLexer(code, CSVFormat.TDF);
+        final CSVLexer parser = getLexer(code, CSVFormat.TDF);
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "one"));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, "two"));
        assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
@ -300,96 +300,96 @@ public class CSVLexerTest {

    @Test
    public void testEscapedCR() throws Exception {
-        final Lexer lexer = getLexer("character\\" + CR + "Escaped", formatWithEscaping);
+        final CSVLexer lexer = getLexer("character\\" + CR + "Escaped", formatWithEscaping);
        assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
    }

    @Test
    public void testCR() throws Exception {
-        final Lexer lexer = getLexer("character" + CR + "NotEscaped", formatWithEscaping);
+        final CSVLexer lexer = getLexer("character" + CR + "NotEscaped", formatWithEscaping);
        assertThat(lexer.nextToken(new Token()), hasContent("character"));
        assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
    }

    @Test
    public void testEscapedLF() throws Exception {
-        final Lexer lexer = getLexer("character\\" + LF + "Escaped", formatWithEscaping);
+        final CSVLexer lexer = getLexer("character\\" + LF + "Escaped", formatWithEscaping);
        assertThat(lexer.nextToken(new Token()), hasContent("character" + LF + "Escaped"));
    }

    @Test
    public void testLF() throws Exception {
-        final Lexer lexer = getLexer("character" + LF + "NotEscaped", formatWithEscaping);
+        final CSVLexer lexer = getLexer("character" + LF + "NotEscaped", formatWithEscaping);
        assertThat(lexer.nextToken(new Token()), hasContent("character"));
        assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
    }

    @Test // TODO is this correct? Do we expect <esc>TAB to be unescaped?
    public void testEscapedTab() throws Exception {
-        final Lexer lexer = getLexer("character\\" + TAB + "Escaped", formatWithEscaping);
+        final CSVLexer lexer = getLexer("character\\" + TAB + "Escaped", formatWithEscaping);
        assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "Escaped"));
    }

    @Test
    public void testTab() throws Exception {
-        final Lexer lexer = getLexer("character" + TAB + "NotEscaped", formatWithEscaping);
+        final CSVLexer lexer = getLexer("character" + TAB + "NotEscaped", formatWithEscaping);
        assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "NotEscaped"));
    }

    @Test // TODO is this correct? Do we expect <esc>BACKSPACE to be unescaped?
    public void testEscapedBackspace() throws Exception {
-        final Lexer lexer = getLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping);
+        final CSVLexer lexer = getLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping);
        assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "Escaped"));
    }

    @Test
    public void testBackspace() throws Exception {
-        final Lexer lexer = getLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping);
+        final CSVLexer lexer = getLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping);
        assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "NotEscaped"));
    }

    @Test // TODO is this correct? Do we expect <esc>FF to be unescaped?
    public void testEscapedFF() throws Exception {
-        final Lexer lexer = getLexer("character\\" + FF + "Escaped", formatWithEscaping);
+        final CSVLexer lexer = getLexer("character\\" + FF + "Escaped", formatWithEscaping);
        assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "Escaped"));
    }

    @Test
    public void testFF() throws Exception {
-        final Lexer lexer = getLexer("character" + FF + "NotEscaped", formatWithEscaping);
+        final CSVLexer lexer = getLexer("character" + FF + "NotEscaped", formatWithEscaping);
        assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "NotEscaped"));
    }

    @Test
    public void testEscapedMySqlNullValue() throws Exception {
        // MySQL uses \N to symbolize null values. We have to restore this
-        final Lexer lexer = getLexer("character\\NEscaped", formatWithEscaping);
+        final CSVLexer lexer = getLexer("character\\NEscaped", formatWithEscaping);
        assertThat(lexer.nextToken(new Token()), hasContent("character\\NEscaped"));
    }

    @Test
    public void testEscapedCharacter() throws Exception {
-        final Lexer lexer = getLexer("character\\aEscaped", formatWithEscaping);
+        final CSVLexer lexer = getLexer("character\\aEscaped", formatWithEscaping);
        assertThat(lexer.nextToken(new Token()), hasContent("character\\aEscaped"));
    }

    @Test
    public void testEscapedControlCharacter() throws Exception {
        // we are explicitly using an escape different from \ here
-        final Lexer lexer = getLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!'));
+        final CSVLexer lexer = getLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!'));
        assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
    }

    @Test
    public void testEscapedControlCharacter2() throws Exception {
-        final Lexer lexer = getLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\'));
+        final CSVLexer lexer = getLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\'));
        assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
    }

    @Test(expected = IOException.class)
    public void testEscapingAtEOF() throws Exception {
        final String code = "escaping at EOF is evil\\";
-        final Lexer lexer = getLexer(code, formatWithEscaping);
+        final CSVLexer lexer = getLexer(code, formatWithEscaping);

        lexer.nextToken(new Token());
    }
--- a/src/test/java/org/apache/commons/csv/PerformanceTest.java
+++ b/src/test/java/org/apache/commons/csv/PerformanceTest.java
@ -224,9 +224,9 @@ public class PerformanceTest {
   }


-   private static Constructor<Lexer> getLexerCtor(final String clazz) throws Exception {
+   private static Constructor<CSVLexer> getLexerCtor(final String clazz) throws Exception {
       @SuppressWarnings("unchecked")
-       final Class<Lexer> lexer = (Class<Lexer>) Class.forName("org.apache.commons.csv." + clazz);
+       final Class<CSVLexer> lexer = (Class<CSVLexer>) Class.forName("org.apache.commons.csv." + clazz);
       return lexer.getConstructor(new Class<?>[]{CSVFormat.class, ExtendedBufferedReader.class});
   }

@ -235,7 +235,7 @@ public class PerformanceTest {
       String dynamic = "";
       for (int i = 0; i < max; i++) {
           final ExtendedBufferedReader input = new ExtendedBufferedReader(getReader());
-           Lexer lexer = null;
+           CSVLexer lexer = null;
           if (test.startsWith("CSVLexer")) {
               dynamic="!";
               lexer = getLexerCtor(test).newInstance(new Object[]{format, input});