Turned the token types into an Enum
git-svn-id: https://svn.apache.org/repos/asf/commons/sandbox/csv/trunk@1199872 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
cbcfb72912
commit
16bfec07ff
|
@ -22,6 +22,7 @@ import java.io.Reader;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.apache.commons.csv.CSVParser.Token.Type.*;
|
||||
|
||||
/**
|
||||
* Parses CSV files according to the specified configuration.
|
||||
|
@ -54,19 +55,6 @@ public class CSVParser {
|
|||
/** length of the initial token (content-)buffer */
|
||||
private static final int INITIAL_TOKEN_LENGTH = 50;
|
||||
|
||||
// the token types
|
||||
/** Token has no valid content, i.e. is in its initialized state. */
|
||||
static final int TT_INVALID = -1;
|
||||
|
||||
/** Token with content, at beginning or in the middle of a line. */
|
||||
static final int TT_TOKEN = 0;
|
||||
|
||||
/** Token (which can have content) when end of file is reached. */
|
||||
static final int TT_EOF = 1;
|
||||
|
||||
/** Token with content when end of a line is reached. */
|
||||
static final int TT_EORECORD = 2;
|
||||
|
||||
/** Immutable empty String array. */
|
||||
private static final String[] EMPTY_STRING_ARRAY = new String[0];
|
||||
|
||||
|
@ -91,22 +79,33 @@ public class CSVParser {
|
|||
* It is used as contract between the lexer and the parser.
|
||||
*/
|
||||
static class Token {
|
||||
/**
|
||||
* Token type, see TT_xxx constants.
|
||||
*/
|
||||
int type = TT_INVALID;
|
||||
/**
|
||||
* The content buffer.
|
||||
*/
|
||||
|
||||
enum Type {
|
||||
/** Token has no valid content, i.e. is in its initialized state. */
|
||||
INVALID,
|
||||
|
||||
/** Token with content, at beginning or in the middle of a line. */
|
||||
TOKEN,
|
||||
|
||||
/** Token (which can have content) when end of file is reached. */
|
||||
EOF,
|
||||
|
||||
/** Token with content when end of a line is reached. */
|
||||
EORECORD
|
||||
}
|
||||
|
||||
/** Token type */
|
||||
Type type = INVALID;
|
||||
|
||||
/** The content buffer. */
|
||||
CharBuffer content = new CharBuffer(INITIAL_TOKEN_LENGTH);
|
||||
/**
|
||||
* Token ready flag: indicates a valid token with content (ready for the parser).
|
||||
*/
|
||||
|
||||
/** Token ready flag: indicates a valid token with content (ready for the parser). */
|
||||
boolean isReady;
|
||||
|
||||
Token reset() {
|
||||
content.clear();
|
||||
type = TT_INVALID;
|
||||
type = INVALID;
|
||||
isReady = false;
|
||||
return this;
|
||||
}
|
||||
|
@ -180,26 +179,26 @@ public class CSVParser {
|
|||
reusableToken.reset();
|
||||
nextToken(reusableToken);
|
||||
switch (reusableToken.type) {
|
||||
case TT_TOKEN:
|
||||
case TOKEN:
|
||||
record.add(reusableToken.content.toString());
|
||||
break;
|
||||
case TT_EORECORD:
|
||||
case EORECORD:
|
||||
record.add(reusableToken.content.toString());
|
||||
break;
|
||||
case TT_EOF:
|
||||
case EOF:
|
||||
if (reusableToken.isReady) {
|
||||
record.add(reusableToken.content.toString());
|
||||
} else {
|
||||
ret = null;
|
||||
}
|
||||
break;
|
||||
case TT_INVALID:
|
||||
case INVALID:
|
||||
default:
|
||||
// error: throw IOException
|
||||
throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");
|
||||
// unreachable: break;
|
||||
}
|
||||
if (reusableToken.type != TT_TOKEN) {
|
||||
if (reusableToken.type != TOKEN) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -272,19 +271,19 @@ public class CSVParser {
|
|||
c = in.readAgain();
|
||||
// reached end of file without any content (empty line at the end)
|
||||
if (isEndOfFile(c)) {
|
||||
tkn.type = TT_EOF;
|
||||
tkn.type = EOF;
|
||||
return tkn;
|
||||
}
|
||||
}
|
||||
|
||||
// did we reach eof during the last iteration already ? TT_EOF
|
||||
// did we reach eof during the last iteration already ? EOF
|
||||
if (isEndOfFile(lastChar) || (lastChar != format.getDelimiter() && isEndOfFile(c))) {
|
||||
tkn.type = TT_EOF;
|
||||
tkn.type = EOF;
|
||||
return tkn;
|
||||
}
|
||||
|
||||
// important: make sure a new char gets consumed in each iteration
|
||||
while (!tkn.isReady && tkn.type != TT_EOF) {
|
||||
while (!tkn.isReady && tkn.type != EOF) {
|
||||
// ignore whitespaces at beginning of a token
|
||||
while (format.isLeadingSpacesIgnored() && isWhitespace(c) && !eol) {
|
||||
wsBuf.append((char) c);
|
||||
|
@ -297,21 +296,21 @@ public class CSVParser {
|
|||
in.readLine();
|
||||
tkn = nextToken(tkn.reset());
|
||||
} else if (c == format.getDelimiter()) {
|
||||
// empty token return TT_TOKEN("")
|
||||
tkn.type = TT_TOKEN;
|
||||
// empty token return TOKEN("")
|
||||
tkn.type = TOKEN;
|
||||
tkn.isReady = true;
|
||||
} else if (eol) {
|
||||
// empty token return TT_EORECORD("")
|
||||
// empty token return EORECORD("")
|
||||
//noop: tkn.content.append("");
|
||||
tkn.type = TT_EORECORD;
|
||||
tkn.type = EORECORD;
|
||||
tkn.isReady = true;
|
||||
} else if (c == format.getEncapsulator()) {
|
||||
// consume encapsulated token
|
||||
encapsulatedTokenLexer(tkn, c);
|
||||
} else if (isEndOfFile(c)) {
|
||||
// end of file return TT_EOF()
|
||||
// end of file return EOF()
|
||||
//noop: tkn.content.append("");
|
||||
tkn.type = TT_EOF;
|
||||
tkn.type = EOF;
|
||||
tkn.isReady = true;
|
||||
} else {
|
||||
// next token must be a simple token
|
||||
|
@ -332,9 +331,9 @@ public class CSVParser {
|
|||
* A simple token might contain escaped delimiters (as \, or \;). The
|
||||
* token is finished when one of the following conditions become true:
|
||||
* <ul>
|
||||
* <li>end of line has been reached (TT_EORECORD)</li>
|
||||
* <li>end of stream has been reached (TT_EOF)</li>
|
||||
* <li>an unescaped delimiter has been reached (TT_TOKEN)</li>
|
||||
* <li>end of line has been reached (EORECORD)</li>
|
||||
* <li>end of stream has been reached (EOF)</li>
|
||||
* <li>an unescaped delimiter has been reached (TOKEN)</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param tkn the current token
|
||||
|
@ -346,17 +345,17 @@ public class CSVParser {
|
|||
for (; ;) {
|
||||
if (isEndOfLine(c)) {
|
||||
// end of record
|
||||
tkn.type = TT_EORECORD;
|
||||
tkn.type = EORECORD;
|
||||
tkn.isReady = true;
|
||||
break;
|
||||
} else if (isEndOfFile(c)) {
|
||||
// end of file
|
||||
tkn.type = TT_EOF;
|
||||
tkn.type = EOF;
|
||||
tkn.isReady = true;
|
||||
break;
|
||||
} else if (c == format.getDelimiter()) {
|
||||
// end of token
|
||||
tkn.type = TT_TOKEN;
|
||||
tkn.type = TOKEN;
|
||||
tkn.isReady = true;
|
||||
break;
|
||||
} else if (c == '\\' && format.isUnicodeEscapesInterpreted() && in.lookAhead() == 'u') {
|
||||
|
@ -414,16 +413,16 @@ public class CSVParser {
|
|||
for (; ;) {
|
||||
c = in.read();
|
||||
if (c == format.getDelimiter()) {
|
||||
tkn.type = TT_TOKEN;
|
||||
tkn.type = TOKEN;
|
||||
tkn.isReady = true;
|
||||
return tkn;
|
||||
} else if (isEndOfFile(c)) {
|
||||
tkn.type = TT_EOF;
|
||||
tkn.type = EOF;
|
||||
tkn.isReady = true;
|
||||
return tkn;
|
||||
} else if (isEndOfLine(c)) {
|
||||
// ok eo token reached
|
||||
tkn.type = TT_EORECORD;
|
||||
tkn.type = EORECORD;
|
||||
tkn.isReady = true;
|
||||
return tkn;
|
||||
} else if (!isWhitespace(c)) {
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -23,6 +24,8 @@ import java.util.Arrays;
|
|||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import static org.apache.commons.csv.CSVParser.Token.Type.*;
|
||||
|
||||
/**
|
||||
* CSVParserTest
|
||||
*
|
||||
|
@ -60,7 +63,7 @@ public class CSVParserTest extends TestCase {
|
|||
*/
|
||||
public String testNextToken() throws IOException {
|
||||
Token t = super.nextToken();
|
||||
return Integer.toString(t.type) + ";" + t.content + ";";
|
||||
return t.type.name() + ";" + t.content + ";";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -72,16 +75,16 @@ public class CSVParserTest extends TestCase {
|
|||
public void testNextToken1() throws IOException {
|
||||
String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
||||
assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";lmnop;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";qrst;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";uv;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";wxy;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";z;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";abc;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";def;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";hijk;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";lmnop;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";qrst;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";uv;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";wxy;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";z;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";;", parser.testNextToken());
|
||||
assertEquals(EOF + ";;", parser.testNextToken());
|
||||
}
|
||||
|
||||
// multiline including comments (and empty lines)
|
||||
|
@ -99,19 +102,19 @@ public class CSVParserTest extends TestCase {
|
|||
TestCSVParser parser = new TestCSVParser(new StringReader(code), format);
|
||||
|
||||
|
||||
assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";b x;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EORECORD + ";c;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";d;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";e;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";1;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";2;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";3;", parser.testNextToken());
|
||||
assertEquals(EORECORD + ";;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";b x;", parser.testNextToken());
|
||||
assertEquals(EORECORD + ";c;", parser.testNextToken());
|
||||
assertEquals(EORECORD + ";;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";d;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";e;", parser.testNextToken());
|
||||
assertEquals(EORECORD + ";;", parser.testNextToken());
|
||||
assertEquals(EOF + ";;", parser.testNextToken());
|
||||
assertEquals(EOF + ";;", parser.testNextToken());
|
||||
|
||||
}
|
||||
|
||||
|
@ -124,15 +127,15 @@ public class CSVParserTest extends TestCase {
|
|||
CSVFormat format = CSVFormat.DEFAULT.withCommentStart('#');
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code), format);
|
||||
|
||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";a;", parser.testNextToken());
|
||||
// an unquoted single backslash is not an escape char
|
||||
assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";\\;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";;", parser.testNextToken());
|
||||
assertEquals(EORECORD + ";b;", parser.testNextToken());
|
||||
// an unquoted single backslash is not an escape char
|
||||
assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";\\;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";;", parser.testNextToken());
|
||||
assertEquals(EOF + ";;", parser.testNextToken());
|
||||
}
|
||||
|
||||
// encapsulator tokenizer (sinle line)
|
||||
|
@ -145,19 +148,19 @@ public class CSVParserTest extends TestCase {
|
|||
String code =
|
||||
"a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + "; foo;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";foo ;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + "; foo ;", parser.testNextToken());
|
||||
// assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EOF + ";b;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";foo;", parser.testNextToken());
|
||||
assertEquals(EORECORD + ";b;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(TOKEN + "; foo;", parser.testNextToken());
|
||||
assertEquals(EORECORD + ";b;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";foo ;", parser.testNextToken());
|
||||
assertEquals(EORECORD + ";b;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(TOKEN + "; foo ;", parser.testNextToken());
|
||||
// assertEquals(EORECORD + ";b;", parser.testNextToken());
|
||||
assertEquals(EOF + ";b;", parser.testNextToken());
|
||||
}
|
||||
|
||||
// encapsulator tokenizer (multi line, delimiter in string)
|
||||
|
@ -165,12 +168,12 @@ public class CSVParserTest extends TestCase {
|
|||
String code =
|
||||
"a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\"";
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EORECORD + ";foo\n baar ,,,;",
|
||||
assertEquals(TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";foo\n;", parser.testNextToken());
|
||||
assertEquals(EORECORD + ";b;", parser.testNextToken());
|
||||
assertEquals(EORECORD + ";foo\n baar ,,,;",
|
||||
parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EOF + ";\n\t \n;", parser.testNextToken());
|
||||
assertEquals(EOF + ";\n\t \n;", parser.testNextToken());
|
||||
|
||||
}
|
||||
|
||||
|
@ -183,9 +186,9 @@ public class CSVParserTest extends TestCase {
|
|||
*/
|
||||
String code = "a;'b and '' more\n'\n!comment;;;;\n;;";
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code), new CSVFormat(';', '\'', '!'));
|
||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";a;", parser.testNextToken());
|
||||
assertEquals(
|
||||
CSVParser.TT_EORECORD + ";b and ' more\n;",
|
||||
EORECORD + ";b and ' more\n;",
|
||||
parser.testNextToken());
|
||||
}
|
||||
|
||||
|
@ -209,13 +212,11 @@ public class CSVParserTest extends TestCase {
|
|||
|
||||
public void testGetLine() throws IOException {
|
||||
CSVParser parser = new CSVParser(new StringReader(code));
|
||||
String[] tmp = null;
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
tmp = parser.getLine();
|
||||
assertTrue(Arrays.equals(res[i], tmp));
|
||||
for (String[] re : res) {
|
||||
assertTrue(Arrays.equals(re, parser.getLine()));
|
||||
}
|
||||
tmp = parser.getLine();
|
||||
assertTrue(tmp == null);
|
||||
|
||||
assertTrue(parser.getLine() == null);
|
||||
}
|
||||
|
||||
public void testGetAllValues() throws IOException {
|
||||
|
@ -282,9 +283,8 @@ public class CSVParserTest extends TestCase {
|
|||
{""}, // Excel format does not ignore empty lines
|
||||
{"world", ""}
|
||||
};
|
||||
String code;
|
||||
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
|
||||
code = codes[codeIndex];
|
||||
|
||||
for (String code : codes) {
|
||||
CSVParser parser = new CSVParser(new StringReader(code), CSVFormat.EXCEL);
|
||||
String[][] tmp = parser.getAllValues();
|
||||
assertEquals(res.length, tmp.length);
|
||||
|
@ -558,11 +558,11 @@ public class CSVParserTest extends TestCase {
|
|||
public void testDelimiterIsWhitespace() throws IOException {
|
||||
String code = "one\ttwo\t\tfour \t five\t six";
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code), CSVFormat.TDF);
|
||||
assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";four;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_TOKEN + ";five;", parser.testNextToken());
|
||||
assertEquals(CSVParser.TT_EOF + ";six;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";one;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";two;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";four;", parser.testNextToken());
|
||||
assertEquals(TOKEN + ";five;", parser.testNextToken());
|
||||
assertEquals(EOF + ";six;", parser.testNextToken());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue