Javadoc improvements, more unit tests, change of API to a chain style, some bugfixes
git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/sandbox/csv/trunk@383468 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
58793330f1
commit
f047581f95
|
@ -37,6 +37,12 @@ import java.util.Vector;
|
||||||
* (new CSVParser(new StringReader("a;b\nc;d"),';')).getAllValues();
|
* (new CSVParser(new StringReader("a;b\nc;d"),';')).getAllValues();
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
|
* <p>The API allows chained method calls, if you like this coding style:</p>
|
||||||
|
* <pre>
|
||||||
|
* String[][] data = (new CSVParser(new StringReader("a;b\nc;d"),';'))
|
||||||
|
* .setExcelStrategy().setIgnoreEmptyLines(true).getAllValues();
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* Internal parser state is completely covered by the strategy
|
* Internal parser state is completely covered by the strategy
|
||||||
* and the reader-state.</p>
|
* and the reader-state.</p>
|
||||||
|
@ -50,9 +56,13 @@ public class CSVParser {
|
||||||
private static final int INITIAL_TOKEN_LENGTH = 50;
|
private static final int INITIAL_TOKEN_LENGTH = 50;
|
||||||
|
|
||||||
// the token types
|
// the token types
|
||||||
|
/** Token has no valid content, i.e. is in its initilized state. */
|
||||||
protected static final int TT_INVALID = -1;
|
protected static final int TT_INVALID = -1;
|
||||||
|
/** Token with content, at beginning or in the middle of a line. */
|
||||||
protected static final int TT_TOKEN = 0;
|
protected static final int TT_TOKEN = 0;
|
||||||
|
/** Token (which can have content) when end of file is reached. */
|
||||||
protected static final int TT_EOF = 1;
|
protected static final int TT_EOF = 1;
|
||||||
|
/** Token with content when end of a line is reached. */
|
||||||
protected static final int TT_EORECORD = 2;
|
protected static final int TT_EORECORD = 2;
|
||||||
|
|
||||||
// the csv definition
|
// the csv definition
|
||||||
|
@ -72,12 +82,13 @@ public class CSVParser {
|
||||||
* It is used as contract between the lexer and the parser.
|
* It is used as contract between the lexer and the parser.
|
||||||
*/
|
*/
|
||||||
class Token {
|
class Token {
|
||||||
// token type see TT_xxx constants
|
/** Token type, see TT_xxx constants. */
|
||||||
int type;
|
int type;
|
||||||
// the content buffer
|
/** The content buffer. */
|
||||||
StringBuffer content;
|
StringBuffer content;
|
||||||
// token ready flag: indicates a valid token (ready for the parser)
|
/** Token ready flag: indicates a valid token with content (ready for the parser). */
|
||||||
boolean isReady;
|
boolean isReady;
|
||||||
|
/** Initializes an empty token. */
|
||||||
Token() {
|
Token() {
|
||||||
content = new StringBuffer(INITIAL_TOKEN_LENGTH);
|
content = new StringBuffer(INITIAL_TOKEN_LENGTH);
|
||||||
type = TT_INVALID;
|
type = TT_INVALID;
|
||||||
|
@ -92,6 +103,7 @@ public class CSVParser {
|
||||||
/**
|
/**
|
||||||
* Parses the given String according to the default CSV strategy.
|
* Parses the given String according to the default CSV strategy.
|
||||||
*
|
*
|
||||||
|
* @param s CSV String to be parsed.
|
||||||
* @return parsed String matrix (which is never null)
|
* @return parsed String matrix (which is never null)
|
||||||
* @throws IOException in case of error
|
* @throws IOException in case of error
|
||||||
* @see #setCSVStrategy()
|
* @see #setCSVStrategy()
|
||||||
|
@ -100,7 +112,13 @@ public class CSVParser {
|
||||||
if (s == null) {
|
if (s == null) {
|
||||||
throw new IllegalArgumentException("Null argument not allowed.");
|
throw new IllegalArgumentException("Null argument not allowed.");
|
||||||
}
|
}
|
||||||
return (new CSVParser(new StringReader(s))).getAllValues();
|
String[][] result = (new CSVParser(new StringReader(s))).getAllValues();
|
||||||
|
if (result == null) {
|
||||||
|
// since CSVStrategy ignores empty lines an empty array is returned
|
||||||
|
// (i.e. not "result = new String[][] {{""}};")
|
||||||
|
result = new String[0][0];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -109,6 +127,7 @@ public class CSVParser {
|
||||||
* Parsing empty string will be handled as valid records containing zero
|
* Parsing empty string will be handled as valid records containing zero
|
||||||
* elements, so the following property holds: parseLine("").length == 0.
|
* elements, so the following property holds: parseLine("").length == 0.
|
||||||
*
|
*
|
||||||
|
* @param s CSV String to be parsed.
|
||||||
* @return parsed String vector (which is never null)
|
* @return parsed String vector (which is never null)
|
||||||
* @throws IOException in case of error
|
* @throws IOException in case of error
|
||||||
* @see #setCSVStrategy()
|
* @see #setCSVStrategy()
|
||||||
|
@ -166,8 +185,8 @@ public class CSVParser {
|
||||||
* Customized csv parser.
|
* Customized csv parser.
|
||||||
*
|
*
|
||||||
* The parser parses according to the given CSV dialect settings.
|
* The parser parses according to the given CSV dialect settings.
|
||||||
* Leading whitespaces are truncated whereas unicode escapes are
|
* Leading whitespaces are truncated, unicode escapes are
|
||||||
* not interpreted.
|
* not interpreted and empty lines are ignored.
|
||||||
*
|
*
|
||||||
* @param input a Reader based on "csv-formatted" input
|
* @param input a Reader based on "csv-formatted" input
|
||||||
* @param delimiter a Char used for value separation
|
* @param delimiter a Char used for value separation
|
||||||
|
@ -201,6 +220,7 @@ public class CSVParser {
|
||||||
* the stream.
|
* the stream.
|
||||||
*
|
*
|
||||||
* @return matrix of records x values ('null' when end of file)
|
* @return matrix of records x values ('null' when end of file)
|
||||||
|
* @throws IOException on parse error or input read-failure
|
||||||
*/
|
*/
|
||||||
public String[][] getAllValues() throws IOException {
|
public String[][] getAllValues() throws IOException {
|
||||||
Vector records = new Vector();
|
Vector records = new Vector();
|
||||||
|
@ -221,7 +241,7 @@ public class CSVParser {
|
||||||
* and returns the next csv-value as string.
|
* and returns the next csv-value as string.
|
||||||
*
|
*
|
||||||
* @return next value in the input stream ('null' when end of file)
|
* @return next value in the input stream ('null' when end of file)
|
||||||
* @throws IOException
|
* @throws IOException on parse error or input read-failure
|
||||||
*/
|
*/
|
||||||
public String nextValue() throws IOException {
|
public String nextValue() throws IOException {
|
||||||
Token tkn = nextToken();
|
Token tkn = nextToken();
|
||||||
|
@ -266,7 +286,11 @@ public class CSVParser {
|
||||||
record.add(tkn.content.toString());
|
record.add(tkn.content.toString());
|
||||||
break;
|
break;
|
||||||
case TT_EOF:
|
case TT_EOF:
|
||||||
|
if (tkn.isReady) {
|
||||||
|
record.add(tkn.content.toString());
|
||||||
|
} else {
|
||||||
ret = null;
|
ret = null;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case TT_INVALID:
|
case TT_INVALID:
|
||||||
default:
|
default:
|
||||||
|
@ -290,9 +314,8 @@ public class CSVParser {
|
||||||
* number does not correspond to the record-number
|
* number does not correspond to the record-number
|
||||||
*
|
*
|
||||||
* @return current line number
|
* @return current line number
|
||||||
* @throws IOException
|
|
||||||
*/
|
*/
|
||||||
public int getLineNumber() throws IOException {
|
public int getLineNumber() {
|
||||||
return in.getLineNumber();
|
return in.getLineNumber();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -301,15 +324,17 @@ public class CSVParser {
|
||||||
// ======================================================
|
// ======================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the next token
|
* Returns the next token.
|
||||||
*
|
*
|
||||||
* a token coresponds to a term, a record change
|
* A token corresponds to a term, a record change or an
|
||||||
* or and end-of-file indicator
|
* end-of-file indicator.
|
||||||
|
*
|
||||||
|
* @return the next token found
|
||||||
|
* @throws IOException on stream access error
|
||||||
*/
|
*/
|
||||||
protected Token nextToken() throws IOException {
|
protected Token nextToken() throws IOException {
|
||||||
Token tkn = new Token();
|
Token tkn = new Token();
|
||||||
StringBuffer wsBuf = new StringBuffer();
|
StringBuffer wsBuf = new StringBuffer();
|
||||||
// boolean skipEmptyLines = false;
|
|
||||||
|
|
||||||
// get the last read char (required for empty line detection)
|
// get the last read char (required for empty line detection)
|
||||||
int lastChar = in.readAgain();
|
int lastChar = in.readAgain();
|
||||||
|
@ -342,7 +367,7 @@ public class CSVParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
// did we reached eof during the last iteration already ? TT_EOF
|
// did we reached eof during the last iteration already ? TT_EOF
|
||||||
if (isEndOfFile(lastChar)) {
|
if (isEndOfFile(lastChar) || (lastChar != delimiter && isEndOfFile(c))) {
|
||||||
tkn.type = TT_EOF;
|
tkn.type = TT_EOF;
|
||||||
return tkn;
|
return tkn;
|
||||||
}
|
}
|
||||||
|
@ -375,8 +400,7 @@ public class CSVParser {
|
||||||
} else if (isEndOfFile(c)) {
|
} else if (isEndOfFile(c)) {
|
||||||
// end of file return TT_EOF()
|
// end of file return TT_EOF()
|
||||||
tkn.content.append("");
|
tkn.content.append("");
|
||||||
tkn.type = TT_EORECORD;
|
tkn.type = TT_EOF;
|
||||||
// tkn.type = TT_EOF;
|
|
||||||
tkn.isReady = true;
|
tkn.isReady = true;
|
||||||
} else {
|
} else {
|
||||||
// next token must be a simple token
|
// next token must be a simple token
|
||||||
|
@ -417,23 +441,15 @@ public class CSVParser {
|
||||||
tkn.isReady = true;
|
tkn.isReady = true;
|
||||||
} else if (isEndOfFile(c)) {
|
} else if (isEndOfFile(c)) {
|
||||||
// end of file
|
// end of file
|
||||||
// tkn.type = TT_EOF;
|
tkn.type = TT_EOF;
|
||||||
tkn.type = TT_EORECORD;
|
|
||||||
tkn.isReady = true;
|
tkn.isReady = true;
|
||||||
} else if (c == delimiter) {
|
} else if (c == delimiter) {
|
||||||
// end of token
|
// end of token
|
||||||
tkn.type = TT_TOKEN;
|
tkn.type = TT_TOKEN;
|
||||||
tkn.isReady = true;
|
tkn.isReady = true;
|
||||||
} else if (c == '\\') {
|
} else if (c == '\\' && interpretUnicodeEscapes && in.lookAhead() == 'u') {
|
||||||
// handle escaped delimiters (remove escaping)
|
|
||||||
if (in.lookAhead() == this.delimiter) {
|
|
||||||
tkn.content.append((char) in.read());
|
|
||||||
} else if (interpretUnicodeEscapes && in.lookAhead() == 'u') {
|
|
||||||
// interpret unicode escaped chars (like \u0070 -> p)
|
// interpret unicode escaped chars (like \u0070 -> p)
|
||||||
tkn.content.append((char) unicodeEscapeLexer(c));
|
tkn.content.append((char) unicodeEscapeLexer(c));
|
||||||
} else {
|
|
||||||
tkn.content.append((char) c);
|
|
||||||
}
|
|
||||||
} else if (isWhitespace(c)) {
|
} else if (isWhitespace(c)) {
|
||||||
// gather whitespaces
|
// gather whitespaces
|
||||||
// (as long as they are not at the beginning of a token)
|
// (as long as they are not at the beginning of a token)
|
||||||
|
@ -484,7 +500,9 @@ public class CSVParser {
|
||||||
c = in.read();
|
c = in.read();
|
||||||
tkn.content.append((char) c);
|
tkn.content.append((char) c);
|
||||||
} else if (c == '\\' && in.lookAhead() == '\\') {
|
} else if (c == '\\' && in.lookAhead() == '\\') {
|
||||||
// doubled escape character -> add single escape char to stream
|
// doubled escape char, it does not escape itself, only encapsulator
|
||||||
|
// -> add both escape chars to stream
|
||||||
|
tkn.content.append((char) c);
|
||||||
c = in.read();
|
c = in.read();
|
||||||
tkn.content.append((char) c);
|
tkn.content.append((char) c);
|
||||||
} else if (
|
} else if (
|
||||||
|
@ -493,16 +511,18 @@ public class CSVParser {
|
||||||
&& in.lookAhead() == 'u') {
|
&& in.lookAhead() == 'u') {
|
||||||
// interpret unicode escaped chars (like \u0070 -> p)
|
// interpret unicode escaped chars (like \u0070 -> p)
|
||||||
tkn.content.append((char) unicodeEscapeLexer(c));
|
tkn.content.append((char) unicodeEscapeLexer(c));
|
||||||
|
} else if (c == '\\') {
|
||||||
|
// use a single escape character -> add it to stream
|
||||||
|
tkn.content.append((char) c);
|
||||||
} else {
|
} else {
|
||||||
// token finish mark reached: ignore ws till delimiter
|
// token finish mark (encapsulator) reached: ignore whitespace till delimiter
|
||||||
while (!tkn.isReady) {
|
while (!tkn.isReady) {
|
||||||
int n = in.lookAhead();
|
int n = in.lookAhead();
|
||||||
if (n == delimiter) {
|
if (n == delimiter) {
|
||||||
tkn.type = TT_TOKEN;
|
tkn.type = TT_TOKEN;
|
||||||
tkn.isReady = true;
|
tkn.isReady = true;
|
||||||
} else if (isEndOfFile(n)) {
|
} else if (isEndOfFile(n)) {
|
||||||
// tkn.type = TT_EOF;
|
tkn.type = TT_EOF;
|
||||||
tkn.type = TT_EORECORD;
|
|
||||||
tkn.isReady = true;
|
tkn.isReady = true;
|
||||||
} else if (isEndOfLine(n)) {
|
} else if (isEndOfLine(n)) {
|
||||||
// ok eo token reached
|
// ok eo token reached
|
||||||
|
@ -538,11 +558,11 @@ public class CSVParser {
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decodes Unicode escapes
|
* Decodes Unicode escapes.
|
||||||
*
|
*
|
||||||
* Interpretation of "\\uXXXX" escape sequences
|
* Interpretation of "\\uXXXX" escape sequences
|
||||||
* where XXXX is a hex-number
|
* where XXXX is a hex-number.
|
||||||
* @param c
|
* @param c current char which is discarded because it's the "\\" of "\\uXXXX"
|
||||||
* @return the decoded character
|
* @return the decoded character
|
||||||
* @throws IOException on wrong unicode escape sequence or read error
|
* @throws IOException on wrong unicode escape sequence or read error
|
||||||
*/
|
*/
|
||||||
|
@ -576,29 +596,40 @@ public class CSVParser {
|
||||||
* Sets the "Default CSV" settings.
|
* Sets the "Default CSV" settings.
|
||||||
*
|
*
|
||||||
* The default csv settings are relatively restrictive but implement
|
* The default csv settings are relatively restrictive but implement
|
||||||
* something like the "least-common-basis" of CSV.
|
* something like the "least-common-basis" of CSV:
|
||||||
*
|
* <ul>
|
||||||
* Values are separated by ',' (as the C in "CSV"). Complex values must
|
* <li> Delimiter of values is comma ',' (as the C in "CSV") </li>
|
||||||
* be surrounded by '"'. Comments are not supported. Leading whitespaces
|
* <li> Complex values encapsulated by '"' </li>
|
||||||
* are ignored, unicode escapes are not interpreted and empty lines
|
* <li> Comments are not supported </li>
|
||||||
* are skiped.
|
* <li> Leading whitespaces are ignored </li>
|
||||||
|
* <li> Unicode escapes are not interpreted </li>
|
||||||
|
* <li> empty lines are skiped </li>
|
||||||
|
* </ul>
|
||||||
|
* @return current instance of CSVParser to allow chained method calls
|
||||||
*/
|
*/
|
||||||
public void setCSVStrategy() {
|
public CSVParser setCSVStrategy() {
|
||||||
setStrategy(',', '"', (char) 0, true, false, true);
|
setStrategy(',', '"', (char) 0, true, false, true);
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the "Excel CSV" settings.
|
* Sets the "Excel CSV" settings. There are companies out there which
|
||||||
|
* interpret "C" as an abbreviation for "Semicolon". For these companies the
|
||||||
|
* following settings might be appropriate:
|
||||||
|
* <ul>
|
||||||
|
* <li> Delimiter of values is semicolon ';' </li>
|
||||||
|
* <li> Complex values encapsulated by '"' </li>
|
||||||
|
* <li> Comments are not supported </li>
|
||||||
|
* <li> Leading whitespaces are not ignored </li>
|
||||||
|
* <li> Unicode escapes are not interpreted </li>
|
||||||
|
* <li> empty lines are not skiped </li>
|
||||||
|
* </ul>
|
||||||
*
|
*
|
||||||
* There are companies out there which interpret "C" as an abbreviation for
|
* @return current instance of CSVParser to allow chained method calls
|
||||||
* "Semicolon". For these companies the following settings might be
|
|
||||||
* appropriate:
|
|
||||||
* <p>
|
|
||||||
* Delimiter Semicolon ';', Complex-values surrounded by '"', leading
|
|
||||||
* whitespaces are not ignored and unicode escapes are not interpreted.
|
|
||||||
*/
|
*/
|
||||||
public void setExcelStrategy() {
|
public CSVParser setExcelStrategy() {
|
||||||
setStrategy(';', '"', (char) 0, false, false, false);
|
setStrategy(';', '"', (char) 0, false, false, false);
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -612,8 +643,9 @@ public class CSVParser {
|
||||||
* @param interpretUnicodeEscapes TRUE when unicode escapes should be
|
* @param interpretUnicodeEscapes TRUE when unicode escapes should be
|
||||||
* interpreted
|
* interpreted
|
||||||
* @param ignoreEmptyLines TRUE when the parser should skip emtpy lines
|
* @param ignoreEmptyLines TRUE when the parser should skip emtpy lines
|
||||||
|
* @return current instance of CSVParser to allow chained method calls
|
||||||
*/
|
*/
|
||||||
public void setStrategy(
|
public CSVParser setStrategy(
|
||||||
char delimiter,
|
char delimiter,
|
||||||
char encapsulator,
|
char encapsulator,
|
||||||
char commentStart,
|
char commentStart,
|
||||||
|
@ -626,15 +658,18 @@ public class CSVParser {
|
||||||
this.setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace);
|
this.setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace);
|
||||||
this.setUnicodeEscapeInterpretation(interpretUnicodeEscapes);
|
this.setUnicodeEscapeInterpretation(interpretUnicodeEscapes);
|
||||||
this.setIgnoreEmptyLines(ignoreEmptyLines);
|
this.setIgnoreEmptyLines(ignoreEmptyLines);
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the desired delimiter
|
* Set the desired delimiter.
|
||||||
*
|
*
|
||||||
* @param c a Char used for value separation
|
* @param c a Char used for value separation
|
||||||
|
* @return current instance of CSVParser to allow chained method calls
|
||||||
*/
|
*/
|
||||||
public void setDelimiter(char c) {
|
public CSVParser setDelimiter(char c) {
|
||||||
this.delimiter = c;
|
this.delimiter = c;
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -647,12 +682,14 @@ public class CSVParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the desired encapsulator
|
* Set the desired encapsulator.
|
||||||
*
|
*
|
||||||
* @param c a Char used as value encapsulation marker
|
* @param c a Char used as value encapsulation marker
|
||||||
|
* @return current instance of CSVParser to allow chained method calls
|
||||||
*/
|
*/
|
||||||
public void setEncapsulator(char c) {
|
public CSVParser setEncapsulator(char c) {
|
||||||
this.encapsulator = c;
|
this.encapsulator = c;
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -665,16 +702,18 @@ public class CSVParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the desired comment start character
|
* Set the desired comment start character.
|
||||||
*
|
*
|
||||||
* @param c a Char used for comment identification
|
* @param c a Char used for comment identification
|
||||||
|
* @return current instance of CSVParser to allow chained method calls
|
||||||
*/
|
*/
|
||||||
public void setCommentStart(char c) {
|
public CSVParser setCommentStart(char c) {
|
||||||
this.commentStart = c;
|
this.commentStart = c;
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the comment identifier
|
* Gets the comment identifier.
|
||||||
*
|
*
|
||||||
* @return the comment identifier character
|
* @return the comment identifier character
|
||||||
*/
|
*/
|
||||||
|
@ -683,16 +722,18 @@ public class CSVParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Enables unicode escape interpretation
|
* Enables unicode escape interpretation.
|
||||||
*
|
*
|
||||||
* @param b TRUE when interpretation should be enabled
|
* @param b TRUE when interpretation should be enabled
|
||||||
|
* @return current instance of CSVParser to allow chained method calls
|
||||||
*/
|
*/
|
||||||
public void setUnicodeEscapeInterpretation(boolean b) {
|
public CSVParser setUnicodeEscapeInterpretation(boolean b) {
|
||||||
this.interpretUnicodeEscapes = b;
|
this.interpretUnicodeEscapes = b;
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shows wether unicode interpretation is enabled
|
* Shows wether unicode interpretation is enabled.
|
||||||
*
|
*
|
||||||
* @return TRUE when unicode interpretation is enabled
|
* @return TRUE when unicode interpretation is enabled
|
||||||
*/
|
*/
|
||||||
|
@ -704,16 +745,18 @@ public class CSVParser {
|
||||||
* Sets the ignore-leading-whitespaces behaviour.
|
* Sets the ignore-leading-whitespaces behaviour.
|
||||||
*
|
*
|
||||||
* Should the lexer ignore leading whitespaces when parsing non
|
* Should the lexer ignore leading whitespaces when parsing non
|
||||||
* encapsulated tokens
|
* encapsulated tokens.
|
||||||
*
|
*
|
||||||
* @param b TRUE when leading whitespaces should be ignored
|
* @param b TRUE when leading whitespaces should be ignored
|
||||||
|
* @return current instance of CSVParser to allow chained method calls
|
||||||
*/
|
*/
|
||||||
public void setIgnoreLeadingWhitespaces(boolean b) {
|
public CSVParser setIgnoreLeadingWhitespaces(boolean b) {
|
||||||
this.ignoreLeadingWhitespaces = b;
|
this.ignoreLeadingWhitespaces = b;
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shows wether unicode interpretation is enabled
|
* Shows whether unicode interpretation is enabled.
|
||||||
*
|
*
|
||||||
* @return TRUE when unicode interpretation is enabled
|
* @return TRUE when unicode interpretation is enabled
|
||||||
*/
|
*/
|
||||||
|
@ -726,10 +769,21 @@ public class CSVParser {
|
||||||
*
|
*
|
||||||
* When set to 'true' empty lines in the input will be ignored.
|
* When set to 'true' empty lines in the input will be ignored.
|
||||||
*
|
*
|
||||||
* @param b
|
* @param b TRUE when empty lines in the input should be ignored
|
||||||
|
* @return current instance of CSVParser to allow chained method calls
|
||||||
*/
|
*/
|
||||||
public void setIgnoreEmptyLines(boolean b) {
|
public CSVParser setIgnoreEmptyLines(boolean b) {
|
||||||
this.ignoreEmptyLines = b;
|
this.ignoreEmptyLines = b;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shows whether empty lines in the input are ignored.
|
||||||
|
*
|
||||||
|
* @return TRUE when empty lines in the input are ignored
|
||||||
|
*/
|
||||||
|
public boolean getIgnoreEmptyLines() {
|
||||||
|
return this.ignoreEmptyLines;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ======================================================
|
// ======================================================
|
||||||
|
|
|
@ -36,12 +36,22 @@ import junit.framework.TestSuite;
|
||||||
public class CSVParserTest extends TestCase {
|
public class CSVParserTest extends TestCase {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TestCSVParser
|
* TestCSVParser.
|
||||||
*/
|
*/
|
||||||
class TestCSVParser extends CSVParser {
|
class TestCSVParser extends CSVParser {
|
||||||
|
/**
|
||||||
|
* Test parser to investigate the type of the internal Token.
|
||||||
|
* @param in a Reader
|
||||||
|
*/
|
||||||
TestCSVParser(Reader in) {
|
TestCSVParser(Reader in) {
|
||||||
super(in);
|
super(in);
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Calls super.nextToken() and prints out a String representation of token
|
||||||
|
* type and content.
|
||||||
|
* @return String representation of token type and content
|
||||||
|
* @throws IOException like {@link CSVParser#nextToken()}
|
||||||
|
*/
|
||||||
public String testNextToken() throws IOException {
|
public String testNextToken() throws IOException {
|
||||||
Token t = super.nextToken();
|
Token t = super.nextToken();
|
||||||
String tmp = Integer.toString(t.type) + ";" + t.content + ";";
|
String tmp = Integer.toString(t.type) + ";" + t.content + ";";
|
||||||
|
@ -51,13 +61,17 @@ public class CSVParserTest extends TestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for CSVParserTest.
|
* Constructor for JUnit.
|
||||||
* @param arg0
|
* @param name Name to be used in JUnit Test Environment
|
||||||
*/
|
*/
|
||||||
public CSVParserTest(String arg0) {
|
public CSVParserTest(String name) {
|
||||||
super(arg0);
|
super(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a Test suite for JUnit.
|
||||||
|
* @return Test suite for JUnit
|
||||||
|
*/
|
||||||
public static Test suite() {
|
public static Test suite() {
|
||||||
return new TestSuite(CSVParserTest.class);
|
return new TestSuite(CSVParserTest.class);
|
||||||
}
|
}
|
||||||
|
@ -95,23 +109,40 @@ public class CSVParserTest extends TestCase {
|
||||||
public void testSetCSVStrategy() {
|
public void testSetCSVStrategy() {
|
||||||
CSVParser parser = new CSVParser(new StringReader("hello world"));
|
CSVParser parser = new CSVParser(new StringReader("hello world"));
|
||||||
// default settings
|
// default settings
|
||||||
assertEquals(parser.getCommentStart(), '\0');
|
|
||||||
assertEquals(parser.getEncapsulator(), '"');
|
|
||||||
assertEquals(parser.getDelimiter(), ',');
|
assertEquals(parser.getDelimiter(), ',');
|
||||||
|
assertEquals(parser.getEncapsulator(), '"');
|
||||||
|
assertEquals(parser.getCommentStart(), '\0');
|
||||||
|
assertEquals(true, parser.getIgnoreLeadingWhitespaces());
|
||||||
|
assertEquals(false, parser.getUnicodeEscapeInterpretation());
|
||||||
|
assertEquals(true, parser.getIgnoreEmptyLines());
|
||||||
// explicit csv settings
|
// explicit csv settings
|
||||||
parser.setCSVStrategy();
|
parser.setCSVStrategy();
|
||||||
assertEquals(parser.getCommentStart(), '\0');
|
|
||||||
assertEquals(parser.getEncapsulator(), '"');
|
|
||||||
assertEquals(parser.getDelimiter(), ',');
|
assertEquals(parser.getDelimiter(), ',');
|
||||||
|
assertEquals(parser.getEncapsulator(), '"');
|
||||||
|
assertEquals(parser.getCommentStart(), '\0');
|
||||||
|
assertEquals(true, parser.getIgnoreLeadingWhitespaces());
|
||||||
|
assertEquals(false, parser.getUnicodeEscapeInterpretation());
|
||||||
|
assertEquals(true, parser.getIgnoreEmptyLines());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSetExcelStrategy() {
|
||||||
|
CSVParser parser = new CSVParser(new StringReader("hello world"));
|
||||||
|
// explicit Excel settings
|
||||||
|
parser.setExcelStrategy();
|
||||||
|
assertEquals(parser.getDelimiter(), ';');
|
||||||
|
assertEquals(parser.getEncapsulator(), '"');
|
||||||
|
assertEquals(parser.getCommentStart(), '\0');
|
||||||
|
assertEquals(false, parser.getIgnoreLeadingWhitespaces());
|
||||||
|
assertEquals(false, parser.getUnicodeEscapeInterpretation());
|
||||||
|
assertEquals(false, parser.getIgnoreEmptyLines());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// ======================================================
|
// ======================================================
|
||||||
// lexer tests
|
// lexer tests
|
||||||
// ======================================================
|
// ======================================================
|
||||||
|
|
||||||
// single line (without comment)
|
// Single line (without comment)
|
||||||
public void testNextToken1() throws IOException {
|
public void testNextToken1() throws IOException {
|
||||||
String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
|
String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
|
||||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
||||||
|
@ -126,14 +157,13 @@ public class CSVParserTest extends TestCase {
|
||||||
assertEquals(CSVParser.TT_TOKEN + ";wxy;", parser.testNextToken());
|
assertEquals(CSVParser.TT_TOKEN + ";wxy;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_TOKEN + ";z;", parser.testNextToken());
|
assertEquals(CSVParser.TT_TOKEN + ";z;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
|
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
|
|
||||||
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
|
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
|
||||||
}
|
}
|
||||||
|
|
||||||
// multiline including comments (and empty lines)
|
// multiline including comments (and empty lines)
|
||||||
public void testNextToken2() throws IOException {
|
public void testNextToken2() throws IOException {
|
||||||
/* file: 1,2,3,
|
/* file: 1,2,3,
|
||||||
* a,b,c
|
* a,b x,c
|
||||||
*
|
*
|
||||||
* # this is a comment
|
* # this is a comment
|
||||||
* d,e,
|
* d,e,
|
||||||
|
@ -172,10 +202,13 @@ public class CSVParserTest extends TestCase {
|
||||||
parser.setCommentStart('#');
|
parser.setCommentStart('#');
|
||||||
System.out.println("---------\n" + code + "\n-------------");
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_TOKEN + ";,;", parser.testNextToken());
|
// an unquoted single backslash is not an escape char
|
||||||
|
assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
|
||||||
|
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_TOKEN + ";,;", parser.testNextToken());
|
// an unquoted single backslash is not an escape char
|
||||||
assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
|
assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
|
||||||
|
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
|
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -183,7 +216,7 @@ public class CSVParserTest extends TestCase {
|
||||||
public void testNextToken4() throws IOException {
|
public void testNextToken4() throws IOException {
|
||||||
/* file: a,"foo",b
|
/* file: a,"foo",b
|
||||||
* a, " foo",b
|
* a, " foo",b
|
||||||
* a,"foo " ,b
|
* a,"foo " ,b // whitespace after closing encapsulator
|
||||||
* a, " foo " ,b
|
* a, " foo " ,b
|
||||||
*/
|
*/
|
||||||
String code =
|
String code =
|
||||||
|
@ -202,28 +235,29 @@ public class CSVParserTest extends TestCase {
|
||||||
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_TOKEN + "; foo ;", parser.testNextToken());
|
assertEquals(CSVParser.TT_TOKEN + "; foo ;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
// assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
|
assertEquals(CSVParser.TT_EOF + ";b;", parser.testNextToken());
|
||||||
}
|
}
|
||||||
|
|
||||||
// encapsulator tokenizer (multi line, delimiter in string)
|
// encapsulator tokenizer (multi line, delimiter in string)
|
||||||
public void testNextToken5() throws IOException {
|
public void testNextToken5() throws IOException {
|
||||||
String code =
|
String code =
|
||||||
"a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\",\"\\\"\",\"\"\"\"";
|
"a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\",\"\\\"\""
|
||||||
|
+ ",\"\\,\""
|
||||||
|
+ ",\"\"\"\"";
|
||||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
||||||
parser.setCSVStrategy();
|
parser.setCSVStrategy();
|
||||||
System.out.println("---------\n" + code + "\n-------------");
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
|
assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
|
||||||
assertEquals(
|
assertEquals(CSVParser.TT_EORECORD + ";foo\n baar ,,,;",
|
||||||
CSVParser.TT_EORECORD + ";foo\n baar ,,,;",
|
|
||||||
parser.testNextToken());
|
parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_TOKEN + ";\n\t \n;", parser.testNextToken());
|
assertEquals(CSVParser.TT_TOKEN + ";\n\t \n;", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_TOKEN + ";\";", parser.testNextToken());
|
assertEquals(CSVParser.TT_TOKEN + ";\";", parser.testNextToken());
|
||||||
assertEquals(CSVParser.TT_EORECORD + ";\";", parser.testNextToken());
|
// escape char in quoted input only escapes delimiter
|
||||||
assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
|
assertEquals(CSVParser.TT_TOKEN + ";\\,;", parser.testNextToken());
|
||||||
|
assertEquals(CSVParser.TT_EOF + ";\";", parser.testNextToken());
|
||||||
}
|
}
|
||||||
|
|
||||||
// change delimiters, comment, encapsulater
|
// change delimiters, comment, encapsulater
|
||||||
|
@ -259,11 +293,10 @@ public class CSVParserTest extends TestCase {
|
||||||
{"a", "b", "c", "d"},
|
{"a", "b", "c", "d"},
|
||||||
{"a", "b", "1 2"},
|
{"a", "b", "1 2"},
|
||||||
{"foo baar", "b", ""},
|
{"foo baar", "b", ""},
|
||||||
{"foo\n,,\n\",,\n\"", "d", "e"},
|
{"foo\n,,\n\",,\n\"", "d", "e"}
|
||||||
{""}
|
|
||||||
};
|
};
|
||||||
public void testGetLine() throws IOException {
|
public void testGetLine() throws IOException {
|
||||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
System.out.println("---------\n" + code + "\n-------------");
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
String[] tmp = null;
|
String[] tmp = null;
|
||||||
for (int i = 0; i < res.length; i++) {
|
for (int i = 0; i < res.length; i++) {
|
||||||
|
@ -275,7 +308,7 @@ public class CSVParserTest extends TestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testNextValue() throws IOException {
|
public void testNextValue() throws IOException {
|
||||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
System.out.println("---------\n" + code + "\n-------------");
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
String tmp = null;
|
String tmp = null;
|
||||||
for (int i = 0; i < res.length; i++) {
|
for (int i = 0; i < res.length; i++) {
|
||||||
|
@ -289,7 +322,7 @@ public class CSVParserTest extends TestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGetAllValues() throws IOException {
|
public void testGetAllValues() throws IOException {
|
||||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
System.out.println("---------\n" + code + "\n-------------");
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
String[][] tmp = parser.getAllValues();
|
String[][] tmp = parser.getAllValues();
|
||||||
assertEquals(res.length, tmp.length);
|
assertEquals(res.length, tmp.length);
|
||||||
|
@ -299,7 +332,7 @@ public class CSVParserTest extends TestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExcelStrategyTest() throws IOException {
|
public void testExcelStrategy1() throws IOException {
|
||||||
String code =
|
String code =
|
||||||
"value1;value2;value3;value4\r\na;b;c;d\r\n x;;;"
|
"value1;value2;value3;value4\r\na;b;c;d\r\n x;;;"
|
||||||
+ "\r\n\r\n\"\"\"hello\"\"\";\" \"\"world\"\"\";\"abc\ndef\";\r\n";
|
+ "\r\n\r\n\"\"\"hello\"\"\";\" \"\"world\"\"\";\"abc\ndef\";\r\n";
|
||||||
|
@ -308,10 +341,9 @@ public class CSVParserTest extends TestCase {
|
||||||
{"a", "b", "c", "d"},
|
{"a", "b", "c", "d"},
|
||||||
{" x", "", "", ""},
|
{" x", "", "", ""},
|
||||||
{""},
|
{""},
|
||||||
{"\"hello\"", " \"world\"", "abc\ndef", ""},
|
{"\"hello\"", " \"world\"", "abc\ndef", ""}
|
||||||
{""}
|
|
||||||
};
|
};
|
||||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
parser.setExcelStrategy();
|
parser.setExcelStrategy();
|
||||||
System.out.println("---------\n" + code + "\n-------------");
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
String[][] tmp = parser.getAllValues();
|
String[][] tmp = parser.getAllValues();
|
||||||
|
@ -322,17 +354,16 @@ public class CSVParserTest extends TestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExcelStrategyTest2() throws Exception {
|
public void testExcelStrategy2() throws Exception {
|
||||||
String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
|
String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
|
||||||
String[][] res = {
|
String[][] res = {
|
||||||
{"foo", "baar"},
|
{"foo", "baar"},
|
||||||
{""},
|
{""},
|
||||||
{"hello", ""},
|
{"hello", ""},
|
||||||
{""},
|
{""},
|
||||||
{"world", ""},
|
{"world", ""}
|
||||||
{""}
|
|
||||||
};
|
};
|
||||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
parser.setExcelStrategy();
|
parser.setExcelStrategy();
|
||||||
System.out.println("---------\n" + code + "\n-------------");
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
String[][] tmp = parser.getAllValues();
|
String[][] tmp = parser.getAllValues();
|
||||||
|
@ -344,7 +375,166 @@ public class CSVParserTest extends TestCase {
|
||||||
}
|
}
|
||||||
assertTrue(Arrays.equals(res[i], tmp[i]));
|
assertTrue(Arrays.equals(res[i], tmp[i]));
|
||||||
}
|
}
|
||||||
//assertTrue(false);
|
}
|
||||||
|
|
||||||
|
public void testEndOfFileBehaviourExcel() throws Exception {
|
||||||
|
String[] codes = {
|
||||||
|
"hello;\r\n\r\nworld;\r\n",
|
||||||
|
"hello;\r\n\r\nworld;",
|
||||||
|
"hello;\r\n\r\nworld;\"\"\r\n",
|
||||||
|
"hello;\r\n\r\nworld;\"\"",
|
||||||
|
"hello;\r\n\r\nworld;\n",
|
||||||
|
"hello;\r\n\r\nworld;",
|
||||||
|
"hello;\r\n\r\nworld;\"\"\n",
|
||||||
|
"hello;\r\n\r\nworld;\"\""
|
||||||
|
};
|
||||||
|
String[][] res = {
|
||||||
|
{"hello", ""},
|
||||||
|
{""}, // ExcelStrategy does not ignore empty lines
|
||||||
|
{"world", ""}
|
||||||
|
};
|
||||||
|
String code;
|
||||||
|
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
|
||||||
|
code = codes[codeIndex];
|
||||||
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
|
parser.setExcelStrategy();
|
||||||
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
|
String[][] tmp = parser.getAllValues();
|
||||||
|
assertEquals(res.length, tmp.length);
|
||||||
|
assertTrue(tmp.length > 0);
|
||||||
|
for (int i = 0; i < res.length; i++) {
|
||||||
|
for (int j = 0; j < tmp[i].length; j++) {
|
||||||
|
System.out.println("'" + tmp[i][j] + "'");
|
||||||
|
}
|
||||||
|
assertTrue(Arrays.equals(res[i], tmp[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEndOfFileBehaviorCSV() throws Exception {
|
||||||
|
String[] codes = {
|
||||||
|
"hello,\r\n\r\nworld,\r\n",
|
||||||
|
"hello,\r\n\r\nworld,",
|
||||||
|
"hello,\r\n\r\nworld,\"\"\r\n",
|
||||||
|
"hello,\r\n\r\nworld,\"\"",
|
||||||
|
"hello,\r\n\r\nworld,\n",
|
||||||
|
"hello,\r\n\r\nworld,",
|
||||||
|
"hello,\r\n\r\nworld,\"\"\n",
|
||||||
|
"hello,\r\n\r\nworld,\"\""
|
||||||
|
};
|
||||||
|
String[][] res = {
|
||||||
|
{"hello", ""}, // CSV Strategy ignores empty lines
|
||||||
|
{"world", ""}
|
||||||
|
};
|
||||||
|
String code;
|
||||||
|
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
|
||||||
|
code = codes[codeIndex];
|
||||||
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
|
parser.setCSVStrategy();
|
||||||
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
|
String[][] tmp = parser.getAllValues();
|
||||||
|
assertEquals(res.length, tmp.length);
|
||||||
|
assertTrue(tmp.length > 0);
|
||||||
|
for (int i = 0; i < res.length; i++) {
|
||||||
|
for (int j = 0; j < tmp[i].length; j++) {
|
||||||
|
System.out.println("'" + tmp[i][j] + "'");
|
||||||
|
}
|
||||||
|
assertTrue(Arrays.equals(res[i], tmp[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEmptyLineBehaviourExcel() throws Exception {
|
||||||
|
String[] codes = {
|
||||||
|
"hello;\r\n\r\n\r\n",
|
||||||
|
"hello;\n\n\n",
|
||||||
|
"hello;\"\"\r\n\r\n\r\n",
|
||||||
|
"hello;\"\"\n\n\n"
|
||||||
|
};
|
||||||
|
String[][] res = {
|
||||||
|
{"hello", ""},
|
||||||
|
{""}, // ExcelStrategy does not ignore empty lines
|
||||||
|
{""}
|
||||||
|
};
|
||||||
|
String code;
|
||||||
|
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
|
||||||
|
code = codes[codeIndex];
|
||||||
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
|
parser.setExcelStrategy();
|
||||||
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
|
String[][] tmp = parser.getAllValues();
|
||||||
|
assertEquals(res.length, tmp.length);
|
||||||
|
assertTrue(tmp.length > 0);
|
||||||
|
for (int i = 0; i < res.length; i++) {
|
||||||
|
for (int j = 0; j < tmp[i].length; j++) {
|
||||||
|
System.out.println("'" + tmp[i][j] + "'");
|
||||||
|
}
|
||||||
|
assertTrue(Arrays.equals(res[i], tmp[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEmptyLineBehaviourCSV() throws Exception {
|
||||||
|
String[] codes = {
|
||||||
|
"hello,\r\n\r\n\r\n",
|
||||||
|
"hello,\n\n\n",
|
||||||
|
"hello,\"\"\r\n\r\n\r\n",
|
||||||
|
"hello,\"\"\n\n\n"
|
||||||
|
};
|
||||||
|
String[][] res = {
|
||||||
|
{"hello", ""} // CSV Strategy ignores empty lines
|
||||||
|
};
|
||||||
|
String code;
|
||||||
|
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
|
||||||
|
code = codes[codeIndex];
|
||||||
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
|
parser.setCSVStrategy();
|
||||||
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
|
String[][] tmp = parser.getAllValues();
|
||||||
|
assertEquals(res.length, tmp.length);
|
||||||
|
assertTrue(tmp.length > 0);
|
||||||
|
for (int i = 0; i < res.length; i++) {
|
||||||
|
for (int j = 0; j < tmp[i].length; j++) {
|
||||||
|
System.out.println("'" + tmp[i][j] + "'");
|
||||||
|
}
|
||||||
|
assertTrue(Arrays.equals(res[i], tmp[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBackslashEscaping() throws IOException {
|
||||||
|
String code =
|
||||||
|
"one,two,three\n"
|
||||||
|
+ "on\\\"e,two\n"
|
||||||
|
+ "on\"e,two\n"
|
||||||
|
+ "one,\"tw\\\"o\"\n"
|
||||||
|
+ "one,\"t\\,wo\"\n"
|
||||||
|
+ "one,two,\"th,ree\"\n"
|
||||||
|
+ "\"a\\\\\"\n"
|
||||||
|
+ "a\\,b\n"
|
||||||
|
+ "\"a\\\\,b\"";
|
||||||
|
String[][] res = {
|
||||||
|
{ "one", "two", "three" },
|
||||||
|
{ "on\\\"e", "two" },
|
||||||
|
{ "on\"e", "two" },
|
||||||
|
{ "one", "tw\"o" },
|
||||||
|
{ "one", "t\\,wo" }, // backslash in quotes only escapes a delimiter (",")
|
||||||
|
{ "one", "two", "th,ree" },
|
||||||
|
{ "a\\\\" }, // backslash in quotes only escapes a delimiter (",")
|
||||||
|
{ "a\\", "b" }, // a backslash must be returnd
|
||||||
|
{ "a\\\\,b" } // backslash in quotes only escapes a delimiter (",")
|
||||||
|
};
|
||||||
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
|
String[][] tmp = parser.getAllValues();
|
||||||
|
assertEquals(res.length, tmp.length);
|
||||||
|
assertTrue(tmp.length > 0);
|
||||||
|
for (int i = 0; i < res.length; i++) {
|
||||||
|
for (int j = 0; j < tmp[i].length; j++) {
|
||||||
|
System.out.println("'" + tmp[i][j] + "'");
|
||||||
|
}
|
||||||
|
assertTrue(Arrays.equals(res[i], tmp[i]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ======================================================
|
// ======================================================
|
||||||
|
@ -386,7 +576,8 @@ public class CSVParserTest extends TestCase {
|
||||||
assertEquals(2, data[0].length);
|
assertEquals(2, data[0].length);
|
||||||
assertEquals(1, data[1].length);
|
assertEquals(1, data[1].length);
|
||||||
assertEquals("abc", data[0][0]);
|
assertEquals("abc", data[0][0]);
|
||||||
assertEquals("def\\nghi", data[0][1]);
|
// an escape char in quotes only escapes a delimiter, not itself
|
||||||
|
assertEquals("def\\\\nghi", data[0][1]);
|
||||||
assertEquals("jkl", data[1][0]);
|
assertEquals("jkl", data[1][0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -402,9 +593,8 @@ public class CSVParserTest extends TestCase {
|
||||||
|
|
||||||
public void testParse6() throws IOException {
|
public void testParse6() throws IOException {
|
||||||
String[][] data = CSVParser.parse("");
|
String[][] data = CSVParser.parse("");
|
||||||
assertEquals(1, data.length);
|
// default strategy is CSV, which ignores empty lines
|
||||||
assertEquals(1, data[0].length);
|
assertEquals(0, data.length);
|
||||||
assertEquals("", data[0][0]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testParse7() throws IOException {
|
public void testParse7() throws IOException {
|
||||||
|
@ -471,7 +661,7 @@ public class CSVParserTest extends TestCase {
|
||||||
|
|
||||||
public void testUnicodeEscape() throws IOException {
|
public void testUnicodeEscape() throws IOException {
|
||||||
String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
|
String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
|
||||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
System.out.println("---------\n" + code + "\n-------------");
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
parser.setUnicodeEscapeInterpretation(true);
|
parser.setUnicodeEscapeInterpretation(true);
|
||||||
String[] data = parser.getLine();
|
String[] data = parser.getLine();
|
||||||
|
@ -482,7 +672,7 @@ public class CSVParserTest extends TestCase {
|
||||||
|
|
||||||
public void testCarriageReturnLineFeedEndings() throws IOException {
|
public void testCarriageReturnLineFeedEndings() throws IOException {
|
||||||
String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
|
String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
|
||||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
System.out.println("---------\n" + code + "\n-------------");
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
String[][] data = parser.getAllValues();
|
String[][] data = parser.getAllValues();
|
||||||
assertEquals(4, data.length);
|
assertEquals(4, data.length);
|
||||||
|
@ -492,7 +682,7 @@ public class CSVParserTest extends TestCase {
|
||||||
String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
|
String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
|
||||||
//String code = "world\r\n\n";
|
//String code = "world\r\n\n";
|
||||||
//String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
|
//String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
|
||||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
System.out.println("---------\n" + code + "\n-------------");
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
String[][] data = parser.getAllValues();
|
String[][] data = parser.getAllValues();
|
||||||
// for (int i = 0; i < data.length; i++) {
|
// for (int i = 0; i < data.length; i++) {
|
||||||
|
@ -509,11 +699,11 @@ public class CSVParserTest extends TestCase {
|
||||||
|
|
||||||
public void testLineTokenConsistency() throws IOException {
|
public void testLineTokenConsistency() throws IOException {
|
||||||
String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
|
String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
|
||||||
TestCSVParser parser = new TestCSVParser(new StringReader(code));
|
CSVParser parser = new CSVParser(new StringReader(code));
|
||||||
System.out.println("---------\n" + code + "\n-------------");
|
System.out.println("---------\n" + code + "\n-------------");
|
||||||
String[][] data = parser.getAllValues();
|
String[][] data = parser.getAllValues();
|
||||||
parser = new TestCSVParser(new StringReader(code));
|
parser = new CSVParser(new StringReader(code));
|
||||||
TestCSVParser parser1 = new TestCSVParser(new StringReader(code));
|
CSVParser parser1 = new CSVParser(new StringReader(code));
|
||||||
for (int i = 0; i < data.length; i++) {
|
for (int i = 0; i < data.length; i++) {
|
||||||
assertTrue(Arrays.equals(parser1.getLine(), data[i]));
|
assertTrue(Arrays.equals(parser1.getLine(), data[i]));
|
||||||
for (int j = 0; j < data[i].length; j++) {
|
for (int j = 0; j < data[i].length; j++) {
|
||||||
|
|
Loading…
Reference in New Issue