No longer wanted

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1511005 13f79535-47bb-0310-9956-ffa450edef68
2013-08-06 15:43:10 +00:00 · 2013-08-06 15:43:10 +00:00 · 7755640784
parent 8d8bbf459e
commit 7755640784
4 changed files with 0 additions and 971 deletions
--- a/src/test/java/org/apache/commons/csv/CSVLexer1.java
+++ b/src/test/java/org/apache/commons/csv/CSVLexer1.java
@ -1,247 +0,0 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.commons.csv;
 import static org.apache.commons.csv.Constants.UNDEFINED;
 import static org.apache.commons.csv.Token.Type.EOF;
 import static org.apache.commons.csv.Token.Type.EORECORD;
 import static org.apache.commons.csv.Token.Type.TOKEN;
 import java.io.IOException;
 /**
 *
 *
 * @version $Id$
 */
 class CSVLexer1 extends Lexer {
    private final StringBuilder wsBuf = new StringBuilder();
    // ctor needs to be public so can be called dynamically by PerformanceTest class
    public CSVLexer1(final CSVFormat format, final ExtendedBufferedReader in) {
        super(format, in);
    }
    /**
     * Returns the next token.
     * <p/>
     * A token corresponds to a term, a record change or an end-of-file indicator.
     *
     * @param tkn an existing Token object to reuse. The caller is responsible to initialize the Token.
     * @return the next token found
     * @throws java.io.IOException on stream access error
     */
    @Override
    Token nextToken(Token tkn) throws IOException {
        wsBuf.setLength(0); // reuse
        // get the last read char (required for empty line detection)
        int lastChar = in.getLastChar();
        //  read the next char and set eol
        /* note: unfortunately isEndOfLine may consumes a character silently.
        *       this has no effect outside of the method. so a simple workaround
        *       is to call 'readAgain' on the stream...
        */
        int c = in.read();
        boolean eol = readEndOfLine(c);
        c = in.getLastChar();
        //  empty line detection: eol AND (last char was EOL or beginning)
        if (format.getIgnoreEmptyLines()) {
            while (eol
                    && (lastChar == '\n' || lastChar == '\r' || lastChar == UNDEFINED)
                    && !isEndOfFile(lastChar)) {
                // go on char ahead ...
                lastChar = c;
                c = in.read();
                eol = readEndOfLine(c);
                c = in.getLastChar();
                // reached end of file without any content (empty line at the end)
                if (isEndOfFile(c)) {
                    tkn.type = EOF;
                    return tkn;
                }
            }
        }
        // did we reach eof during the last iteration already ? EOF
        if (isEndOfFile(lastChar) || (lastChar != format.getDelimiter() && isEndOfFile(c))) {
            tkn.type = EOF;
            return tkn;
        }
        //  important: make sure a new char gets consumed in each iteration
        while (!tkn.isReady && tkn.type != EOF) {
            // ignore whitespaces at beginning of a token
            if (format.getIgnoreSurroundingSpaces()) {
                while (isWhitespace(c) && !eol) {
                    wsBuf.append((char) c);
                    c = in.read();
                    eol = readEndOfLine(c);
                }
            }
            // ok, start of token reached: comment, encapsulated, or token
            if (c == format.getCommentStart()) {
                // ignore everything till end of line and continue (incr linecount)
                in.readLine();
                tkn.reset();
                tkn = nextToken(tkn);
            } else if (c == format.getDelimiter()) {
                // empty token return TOKEN("")
                tkn.type = TOKEN;
                tkn.isReady = true;
            } else if (eol) {
                // empty token return EORECORD("")
                //noop: tkn.content.append("");
                tkn.type = EORECORD;
                tkn.isReady = true;
            } else if (c == format.getQuoteChar()) {
                // consume encapsulated token
                encapsulatedTokenLexer(tkn, c);
            } else if (isEndOfFile(c)) {
                // end of file return EOF()
                //noop: tkn.content.append("");
                tkn.type = EOF;
                tkn.isReady = true;
            } else {
                // next token must be a simple token
                // add removed blanks when not ignoring whitespace chars...
                if (!format.getIgnoreSurroundingSpaces()) {
                    tkn.content.append(wsBuf);
                }
                simpleTokenLexer(tkn, c);
            }
        }
        return tkn;
    }
    /**
     * A simple token lexer
     * <p/>
     * Simple token are tokens which are not surrounded by encapsulators.
     * A simple token might contain escaped delimiters (as \, or \;). The
     * token is finished when one of the following conditions become true:
     * <ul>
     *   <li>end of line has been reached (EORECORD)</li>
     *   <li>end of stream has been reached (EOF)</li>
     *   <li>an unescaped delimiter has been reached (TOKEN)</li>
     * </ul>
     *
     * @param tkn the current token
     * @param c   the current character
     * @return the filled token
     * @throws IOException on stream access error
     */
    private Token simpleTokenLexer(final Token tkn, int c) throws IOException {
        while (true) {
            if (readEndOfLine(c)) {
                // end of record
                tkn.type = EORECORD;
                tkn.isReady = true;
                break;
            } else if (isEndOfFile(c)) {
                // end of file
                tkn.type = EOF;
                tkn.isReady = true;
                break;
            } else if (c == format.getDelimiter()) {
                // end of token
                tkn.type = TOKEN;
                tkn.isReady = true;
                break;
            } else if (c == format.getEscape()) {
                tkn.content.append((char) readEscape());
            } else {
                tkn.content.append((char) c);
            }
            c = in.read();
        }
        if (format.getIgnoreSurroundingSpaces()) {
            trimTrailingSpaces(tkn.content);
        }
        return tkn;
    }
    /**
     * An encapsulated token lexer
     * <p/>
     * Encapsulated tokens are surrounded by the given encapsulating-string.
     * The encapsulator itself might be included in the token using a
     * doubling syntax (as "", '') or using escaping (as in \", \').
     * Whitespaces before and after an encapsulated token are ignored.
     *
     * @param tkn the current token
     * @param c   the current character
     * @return a valid token object
     * @throws IOException on invalid state
     */
    private Token encapsulatedTokenLexer(final Token tkn, int c) throws IOException {
        // save current line
        final long startLineNumber = getCurrentLineNumber();
        // ignore the given delimiter
        // assert c == delimiter;
        while (true) {
            c = in.read();
            if (c == format.getEscape()) {
                tkn.content.append((char) readEscape());
            } else if (c == format.getQuoteChar()) {
                if (in.lookAhead() == format.getQuoteChar()) {
                    // double or escaped encapsulator -> add single encapsulator to token
                    c = in.read();
                    tkn.content.append((char) c);
                } else {
                    // token finish mark (encapsulator) reached: ignore whitespace till delimiter
                    while (true) {
                        c = in.read();
                        if (c == format.getDelimiter()) {
                            tkn.type = TOKEN;
                            tkn.isReady = true;
                            return tkn;
                        } else if (isEndOfFile(c)) {
                            tkn.type = EOF;
                            tkn.isReady = true;
                            return tkn;
                        } else if (readEndOfLine(c)) {
                            // ok eo token reached
                            tkn.type = EORECORD;
                            tkn.isReady = true;
                            return tkn;
                        } else if (!isWhitespace(c)) {
                            // error invalid char between token and next delimiter
                            throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
                        }
                    }
                }
            } else if (isEndOfFile(c)) {
                // error condition (end of file before end of token)
                throw new IOException("(startline " + startLineNumber + ") EOF reached before encapsulated token finished");
            } else {
                // consume character
                tkn.content.append((char) c);
            }
        }
    }
 }
--- a/src/test/java/org/apache/commons/csv/CSVLexer1306663.java
+++ b/src/test/java/org/apache/commons/csv/CSVLexer1306663.java
@ -1,235 +0,0 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.commons.csv;
 import static org.apache.commons.csv.Token.Type.COMMENT;
 import static org.apache.commons.csv.Token.Type.EOF;
 import static org.apache.commons.csv.Token.Type.EORECORD;
 import static org.apache.commons.csv.Token.Type.INVALID;
 import static org.apache.commons.csv.Token.Type.TOKEN;
 import java.io.IOException;
 /**
 *
 *
 * @version $Id$
 */
 class CSVLexer1306663 extends Lexer {
    // ctor needs to be public so can be called dynamically by PerformanceTest class
    public CSVLexer1306663(final CSVFormat format, final ExtendedBufferedReader in) {
        super(format, in);
    }
    /**
     * Returns the next token.
     * <p/>
     * A token corresponds to a term, a record change or an end-of-file indicator.
     *
     * @param tkn an existing Token object to reuse. The caller is responsible to initialize the Token.
     * @return the next token found
     * @throws java.io.IOException on stream access error
     */
    @Override
    Token nextToken(final Token tkn) throws IOException {
        // get the last read char (required for empty line detection)
        int lastChar = in.getLastChar();
        //  read the next char and set eol
        int c = in.read();
        if (isStartOfLine(lastChar) && isCommentStart(c)) {
            in.readLine();
            tkn.type = COMMENT;
            return tkn;
        }
        /* note: unfortunately isEndOfLine may consumes a character silently.
        *       this has no effect outside of the method. so a simple workaround
        *       is to call 'readAgain' on the stream...
        */
        boolean eol = readEndOfLine(c);
        c = in.getLastChar();
        //  empty line detection: eol AND (last char was EOL or beginning)
        if (ignoreEmptyLines) {
            while (eol && isStartOfLine(lastChar)) {
                // go on char ahead ...
                lastChar = c;
                c = in.read();
                eol = readEndOfLine(c);
                c = in.getLastChar();
                // reached end of file without any content (empty line at the end)
                if (isEndOfFile(c)) {
                    tkn.type = EOF;
                    // don't set tkn.isReady here because no content
                    return tkn;
                }
            }
        }
        // did we reach eof during the last iteration already ? EOF
        if (isEndOfFile(lastChar) || (!isDelimiter(lastChar) && isEndOfFile(c))) {
            tkn.type = EOF;
            // don't set tkn.isReady here because no content
            return tkn;
        }
        //  important: make sure a new char gets consumed in each iteration
        while (tkn.type == INVALID) {
            // ignore whitespaces at beginning of a token
            if (ignoreSurroundingSpaces) {
                while (isWhitespace(c) && !eol) {
                    c = in.read();
                    eol = readEndOfLine(c);
                }
            }
            // ok, start of token reached: encapsulated, or token
            if (isDelimiter(c)) {
                // empty token return TOKEN("")
                tkn.type = TOKEN;
            } else if (eol) {
                // empty token return EORECORD("")
                //noop: tkn.content.append("");
                tkn.type = EORECORD;
            } else if (isQuoteChar(c)) {
                // consume encapsulated token
                encapsulatedTokenLexer(tkn);
            } else if (isEndOfFile(c)) {
                // end of file return EOF()
                //noop: tkn.content.append("");
                tkn.type = EOF;
                tkn.isReady = true; // there is data at EOF
            } else {
                // next token must be a simple token
                // add removed blanks when not ignoring whitespace chars...
                simpleTokenLexer(tkn, c);
            }
        }
        return tkn;
    }
    /**
     * A simple token lexer
     * <p/>
     * Simple token are tokens which are not surrounded by encapsulators.
     * A simple token might contain escaped delimiters (as \, or \;). The
     * token is finished when one of the following conditions become true:
     * <ul>
     *   <li>end of line has been reached (EORECORD)</li>
     *   <li>end of stream has been reached (EOF)</li>
     *   <li>an unescaped delimiter has been reached (TOKEN)</li>
     * </ul>
     *
     * @param tkn the current token
     * @param c   the current character
     * @return the filled token
     * @throws IOException on stream access error
     */
    private Token simpleTokenLexer(final Token tkn, int c) throws IOException {
        // Faster to use while(true)+break than while(tkn.type == INVALID)
        while (true) {
            if (readEndOfLine(c)) {
                tkn.type = EORECORD;
                break;
            } else if (isEndOfFile(c)) {
                tkn.type = EOF;
                tkn.isReady = true; // There is data at EOF
                break;
            } else if (isDelimiter(c)) {
                tkn.type = TOKEN;
                break;
            } else if (isEscape(c)) {
                tkn.content.append((char) readEscape());
                c = in.read(); // continue
            } else {
                tkn.content.append((char) c);
                c = in.read(); // continue
            }
        }
        if (ignoreSurroundingSpaces) {
            trimTrailingSpaces(tkn.content);
        }
        return tkn;
    }
    /**
     * An encapsulated token lexer
     * <p/>
     * Encapsulated tokens are surrounded by the given encapsulating-string.
     * The encapsulator itself might be included in the token using a
     * doubling syntax (as "", '') or using escaping (as in \", \').
     * Whitespaces before and after an encapsulated token are ignored.
     *
     * @param tkn the current token
     * @return a valid token object
     * @throws IOException on invalid state
     */
    private Token encapsulatedTokenLexer(final Token tkn) throws IOException {
        // save current line
        final long startLineNumber = getCurrentLineNumber();
        // ignore the given delimiter
        // assert c == delimiter;
        int c;
        while (true) {
            c = in.read();
            if (isEscape(c)) {
                tkn.content.append((char) readEscape());
            } else if (isQuoteChar(c)) {
                if (isQuoteChar(in.lookAhead())) {
                    // double or escaped encapsulator -> add single encapsulator to token
                    c = in.read();
                    tkn.content.append((char) c);
                } else {
                    // token finish mark (encapsulator) reached: ignore whitespace till delimiter
                    while (true) {
                        c = in.read();
                        if (isDelimiter(c)) {
                            tkn.type = TOKEN;
                            return tkn;
                        } else if (isEndOfFile(c)) {
                            tkn.type = EOF;
                            tkn.isReady = true; // There is data at EOF
                            return tkn;
                        } else if (readEndOfLine(c)) {
                            // ok eo token reached
                            tkn.type = EORECORD;
                            return tkn;
                        } else if (!isWhitespace(c)) {
                            // error invalid char between token and next delimiter
                            throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
                        }
                    }
                }
            } else if (isEndOfFile(c)) {
                // error condition (end of file before end of token)
                throw new IOException("(startline " + startLineNumber + ") EOF reached before encapsulated token finished");
            } else {
                // consume character
                tkn.content.append((char) c);
            }
        }
    }
 }
--- a/src/test/java/org/apache/commons/csv/CSVLexer1306667.java
+++ b/src/test/java/org/apache/commons/csv/CSVLexer1306667.java
@ -1,235 +0,0 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.commons.csv;
 import static org.apache.commons.csv.Token.Type.COMMENT;
 import static org.apache.commons.csv.Token.Type.EOF;
 import static org.apache.commons.csv.Token.Type.EORECORD;
 import static org.apache.commons.csv.Token.Type.INVALID;
 import static org.apache.commons.csv.Token.Type.TOKEN;
 import java.io.IOException;
 /**
 *
 *
 * @version $Id$
 */
 class CSVLexer1306667 extends Lexer {
    // ctor needs to be public so can be called dynamically by PerformanceTest class
    public CSVLexer1306667(final CSVFormat format, final ExtendedBufferedReader in) {
        super(format, in);
    }
    /**
     * Returns the next token.
     * <p/>
     * A token corresponds to a term, a record change or an end-of-file indicator.
     *
     * @param tkn an existing Token object to reuse. The caller is responsible to initialize the Token.
     * @return the next token found
     * @throws java.io.IOException on stream access error
     */
    @Override
    Token nextToken(final Token tkn) throws IOException {
        // get the last read char (required for empty line detection)
        int lastChar = in.getLastChar();
        //  read the next char and set eol
        int c = in.read();
        /* note: unfortunately isEndOfLine may consumes a character silently.
        *       this has no effect outside of the method. so a simple workaround
        *       is to call 'readAgain' on the stream...
        */
        boolean eol = readEndOfLine(c);
        c = in.getLastChar();
        //  empty line detection: eol AND (last char was EOL or beginning)
        if (ignoreEmptyLines) {
            while (eol && isStartOfLine(lastChar)) {
                // go on char ahead ...
                lastChar = c;
                c = in.read();
                eol = readEndOfLine(c);
                c = in.getLastChar();
                // reached end of file without any content (empty line at the end)
                if (isEndOfFile(c)) {
                    tkn.type = EOF;
                    // don't set tkn.isReady here because no content
                    return tkn;
                }
            }
        }
        // did we reach eof during the last iteration already ? EOF
        if (isEndOfFile(lastChar) || (!isDelimiter(lastChar) && isEndOfFile(c))) {
            tkn.type = EOF;
            // don't set tkn.isReady here because no content
            return tkn;
        }
        if (isStartOfLine(lastChar) && isCommentStart(c)) {
            in.readLine();
            tkn.type = COMMENT;
            return tkn;
        }
        //  important: make sure a new char gets consumed in each iteration
        while (tkn.type == INVALID) {
            // ignore whitespaces at beginning of a token
            if (ignoreSurroundingSpaces) {
                while (isWhitespace(c) && !eol) {
                    c = in.read();
                    eol = readEndOfLine(c);
                }
            }
            // ok, start of token reached: encapsulated, or token
            if (isDelimiter(c)) {
                // empty token return TOKEN("")
                tkn.type = TOKEN;
            } else if (eol) {
                // empty token return EORECORD("")
                //noop: tkn.content.append("");
                tkn.type = EORECORD;
            } else if (isQuoteChar(c)) {
                // consume encapsulated token
                encapsulatedTokenLexer(tkn);
            } else if (isEndOfFile(c)) {
                // end of file return EOF()
                //noop: tkn.content.append("");
                tkn.type = EOF;
                tkn.isReady = true; // there is data at EOF
            } else {
                // next token must be a simple token
                // add removed blanks when not ignoring whitespace chars...
                simpleTokenLexer(tkn, c);
            }
        }
        return tkn;
    }
    /**
     * A simple token lexer
     * <p/>
     * Simple token are tokens which are not surrounded by encapsulators.
     * A simple token might contain escaped delimiters (as \, or \;). The
     * token is finished when one of the following conditions become true:
     * <ul>
     *   <li>end of line has been reached (EORECORD)</li>
     *   <li>end of stream has been reached (EOF)</li>
     *   <li>an unescaped delimiter has been reached (TOKEN)</li>
     * </ul>
     *
     * @param tkn the current token
     * @param c   the current character
     * @return the filled token
     * @throws IOException on stream access error
     */
    private Token simpleTokenLexer(final Token tkn, int c) throws IOException {
        // Faster to use while(true)+break than while(tkn.type == INVALID)
        while (true) {
            if (readEndOfLine(c)) {
                tkn.type = EORECORD;
                break;
            } else if (isEndOfFile(c)) {
                tkn.type = EOF;
                tkn.isReady = true; // There is data at EOF
                break;
            } else if (isDelimiter(c)) {
                tkn.type = TOKEN;
                break;
            } else if (isEscape(c)) {
                tkn.content.append((char) readEscape());
                c = in.read(); // continue
            } else {
                tkn.content.append((char) c);
                c = in.read(); // continue
            }
        }
        if (ignoreSurroundingSpaces) {
            trimTrailingSpaces(tkn.content);
        }
        return tkn;
    }
    /**
     * An encapsulated token lexer
     * <p/>
     * Encapsulated tokens are surrounded by the given encapsulating-string.
     * The encapsulator itself might be included in the token using a
     * doubling syntax (as "", '') or using escaping (as in \", \').
     * Whitespaces before and after an encapsulated token are ignored.
     *
     * @param tkn the current token
     * @return a valid token object
     * @throws IOException on invalid state
     */
    private Token encapsulatedTokenLexer(final Token tkn) throws IOException {
        // save current line
        final long startLineNumber = getCurrentLineNumber();
        // ignore the given delimiter
        // assert c == delimiter;
        int c;
        while (true) {
            c = in.read();
            if (isEscape(c)) {
                tkn.content.append((char) readEscape());
            } else if (isQuoteChar(c)) {
                if (isQuoteChar(in.lookAhead())) {
                    // double or escaped encapsulator -> add single encapsulator to token
                    c = in.read();
                    tkn.content.append((char) c);
                } else {
                    // token finish mark (encapsulator) reached: ignore whitespace till delimiter
                    while (true) {
                        c = in.read();
                        if (isDelimiter(c)) {
                            tkn.type = TOKEN;
                            return tkn;
                        } else if (isEndOfFile(c)) {
                            tkn.type = EOF;
                            tkn.isReady = true; // There is data at EOF
                            return tkn;
                        } else if (readEndOfLine(c)) {
                            // ok eo token reached
                            tkn.type = EORECORD;
                            return tkn;
                        } else if (!isWhitespace(c)) {
                            // error invalid char between token and next delimiter
                            throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
                        }
                    }
                }
            } else if (isEndOfFile(c)) {
                // error condition (end of file before end of token)
                throw new IOException("(startline " + startLineNumber + ") EOF reached before encapsulated token finished");
            } else {
                // consume character
                tkn.content.append((char) c);
            }
        }
    }
 }
--- a/src/test/java/org/apache/commons/csv/CSVLexer3.java
+++ b/src/test/java/org/apache/commons/csv/CSVLexer3.java
@ -1,254 +0,0 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.commons.csv;
 import static org.apache.commons.csv.Constants.END_OF_STREAM;
 import static org.apache.commons.csv.Token.Type.COMMENT;
 import static org.apache.commons.csv.Token.Type.EOF;
 import static org.apache.commons.csv.Token.Type.EORECORD;
 import static org.apache.commons.csv.Token.Type.INVALID;
 import static org.apache.commons.csv.Token.Type.TOKEN;
 import java.io.IOException;
 /**
 * Experimental Lexer using enums to keep track of state and character type.
 * Unfortunately it is twice as slow.
 * For reference purpose only.
 *
 * @version $Id$
 */
 class CSVLexer3 extends Lexer {
    private final char escape;
    // ctor needs to be public so can be called dynamically by PerformanceTest class
    public CSVLexer3(final CSVFormat format, final ExtendedBufferedReader in) {
        super(format, in);
        this.escape = format.getEscape();
    }
    /**
     * Classify the character types
     */
    private static enum CharType {
        DELIM,
        ESCAPE,
        ENCAP,
        EOL,
        COMMENT_START,
        WHITESPACE,
        OTHER,
        EOFCHAR
    }
    private CharType classify(final int intch) {
        if (isDelimiter(intch)) {
            return CharType.DELIM;
        }
        if (isCommentStart(intch)) {
            return CharType.COMMENT_START;
        }
        if (isQuoteChar(intch)) {
            return CharType.ENCAP;
        }
        if (isEscape(intch)) {
            return CharType.ESCAPE;
        }
        if (intch == '\r' || intch == '\n') {
            return CharType.EOL;
        }
        if (isWhitespace(intch)) { // Must be after EOL check
            return CharType.WHITESPACE;
        }
        if (intch == END_OF_STREAM) {
            return CharType.EOFCHAR;
        }
        return CharType.OTHER;
    }
    /**
     * Parsing states
     */
    private static enum State {
        BEGIN, PLAIN, INQUOTE, QUOTEQUOTE, ESCAPE_PLAIN, ESCAPE_QUOTE,
    }
    /**
     * Returns the next token.
     * <p/>
     * A token corresponds to a term, a record change or an end-of-file indicator.
     *
     * @param tkn an existing Token object to reuse. The caller is responsible to initialize the Token.
     * @return the next token found
     * @throws java.io.IOException on stream access error
     */
    @Override
    Token nextToken(final Token tkn) throws IOException {
        State state = State.BEGIN;
        int intch;
        boolean trimTrailingSpaces = false;
        while(tkn.type == INVALID) {
            intch = in.read();
            final CharType type = classify(intch);
            switch(state) {
                case BEGIN:
                    switch(type){
                        case COMMENT_START:
                            in.readLine();
                            tkn.type = COMMENT;
                            break;
                        case ENCAP:
                            state = State.INQUOTE;
                            break;
                        case DELIM:
                            tkn.type = TOKEN;
                            break;
                        case EOL:
                            tkn.type = EORECORD;
                            break;
                        case EOFCHAR:
                            tkn.type = EOF;
                            break;
                        case ESCAPE:
                            state = State.ESCAPE_PLAIN;
                            break;
                        case OTHER:
                            tkn.content.append((char) intch);
                            state = State.PLAIN;
                            break;
                        case WHITESPACE:
                            if (!ignoreSurroundingSpaces){
                                tkn.content.append((char) intch);
                                state = State.PLAIN;
                            }
                            break;
                    }
                    break;
                case PLAIN:
                    switch(type){
                        case DELIM:
                            tkn.type = TOKEN;
                            break;
                        case EOL:
                            tkn.type = EORECORD;
                            break;
                        case EOFCHAR:
                            tkn.type = EOF;
                            break;
                        case ESCAPE:
                            state = State.ESCAPE_PLAIN;
                            break;
                        default:
                            trimTrailingSpaces = ignoreSurroundingSpaces; // we have a plain token
                            tkn.content.append((char) intch);
                            break;
                    }
                    break;
                case INQUOTE: // Started a quoted string
                    switch(type){
                        case ENCAP:
                            state = State.QUOTEQUOTE;
                            break;
                        case ESCAPE:
                            state = State.ESCAPE_QUOTE;
                            break;
                        case EOFCHAR:
                            throw new IOException("(line " + getCurrentLineNumber() + ") unexpected EOF in quoted string");
                        default:
                            tkn.content.append((char) intch);
                            break;
                    }
                    break;
                case QUOTEQUOTE: // "..." seen, expecting end of token or "
                    switch(type){
                        case DELIM:
                            tkn.type = TOKEN;
                            break;
                        case EOL:
                            tkn.type = EORECORD;
                            break;
                        case EOFCHAR:
                            tkn.type = EOF;
                            break;
                        case ENCAP: // "..."" seen, append it
                            tkn.content.append((char) intch);
                            state = State.INQUOTE;
                            break;
                        case WHITESPACE: // trailing whitespace may be allowed
                            if (!ignoreSurroundingSpaces) {
                                // error invalid char between token and next delimiter
                                throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
                            }
                            break;
                        // Everything else is invalid
                        case ESCAPE:
                        case OTHER:
                        case COMMENT_START:
                            // error invalid char between token and next delimiter
                            throw new IOException("(line " + getCurrentLineNumber() + ") invalid char between encapsulated token and delimiter");
                    }
                    break;
                case ESCAPE_PLAIN:
                    switch(type){
                        case DELIM:
                        case ESCAPE:
                        case EOL:
                            tkn.content.append((char) intch);
                            state = State.PLAIN;
                            break;
                        case COMMENT_START: // TODO should comment be escaped?
                        case ENCAP: // TODO is this correct?
                        case OTHER: // TODO may need to escape further
                        case WHITESPACE:
                            tkn.content.append(escape);
                            tkn.content.append((char) intch);
                            break;
                        case EOFCHAR:
                            throw new IOException("(line " + getCurrentLineNumber() + ") unexpected EOF in escape sequence");
                    }
                    break;
                case ESCAPE_QUOTE:
                    switch(type){
                        case ESCAPE:
                        case ENCAP: // this is the only required escape
                            tkn.content.append((char) intch);
                            break;
                        case COMMENT_START:
                        case DELIM:
                        case EOL:
                        case OTHER:
                        case WHITESPACE:
                            tkn.content.append(escape);
                            tkn.content.append((char) intch);
                            break;
                        case EOFCHAR:
                            throw new IOException("(line " + getCurrentLineNumber() + ") unexpected EOF in escape sequence");
                    }
                    break;
                default:
                    break;
            }
        }
        if (trimTrailingSpaces) {
            trimTrailingSpaces(tkn.content);
        }
        return tkn;
    }
 }