diff --git a/src/java/org/apache/commons/csv/CSVParser.java b/src/java/org/apache/commons/csv/CSVParser.java
index 1c20de90..115a8e08 100644
--- a/src/java/org/apache/commons/csv/CSVParser.java
+++ b/src/java/org/apache/commons/csv/CSVParser.java
@@ -28,578 +28,609 @@ import java.util.ArrayList;
*
* Because CSV appears in many different dialects, the parser supports many
* configuration settings by allowing the specification of a {@link CSVStrategy}.
- *
+ *
*
Parsing of a csv-string having tabs as separators,
* '"' as an optional value encapsulator, and comments starting with '#':
*
- * String[][] data =
+ * String[][] data =
* (new CSVParser(new StringReader("a\tb\nc\td"), new CSVStrategy('\t','"','#'))).getAllValues();
*
- *
+ *
* Parsing of a csv-string in Excel CSV format
*
* String[][] data =
* (new CSVParser(new StringReader("a;b\nc;d"), CSVStrategy.EXCEL_STRATEGY)).getAllValues();
*
- *
+ *
*
* Internal parser state is completely covered by the strategy
* and the reader-state.
- *
- * see package documentation
+ *
+ *
see package documentation
* for more details
*/
public class CSVParser {
- /** length of the initial token (content-)buffer */
- private static final int INITIAL_TOKEN_LENGTH = 50;
-
- // the token types
- /** Token has no valid content, i.e. is in its initialized state. */
- protected static final int TT_INVALID = -1;
- /** Token with content, at beginning or in the middle of a line. */
- protected static final int TT_TOKEN = 0;
- /** Token (which can have content) when end of file is reached. */
- protected static final int TT_EOF = 1;
- /** Token with content when end of a line is reached. */
- protected static final int TT_EORECORD = 2;
+ /**
+ * length of the initial token (content-)buffer
+ */
+ private static final int INITIAL_TOKEN_LENGTH = 50;
- /** Immutable empty String array. */
- private static final String[] EMPTY_STRING_ARRAY = new String[0];
-
- // the input stream
- private final ExtendedBufferedReader in;
+ // the token types
+ /**
+ * Token has no valid content, i.e. is in its initialized state.
+ */
+ protected static final int TT_INVALID = -1;
+ /**
+ * Token with content, at beginning or in the middle of a line.
+ */
+ protected static final int TT_TOKEN = 0;
+ /**
+ * Token (which can have content) when end of file is reached.
+ */
+ protected static final int TT_EOF = 1;
+ /**
+ * Token with content when end of a line is reached.
+ */
+ protected static final int TT_EORECORD = 2;
- private final CSVStrategy strategy;
-
- // the following objects are shared to reduce garbage
- /** A record buffer for getLine(). Grows as necessary and is reused. */
- private final ArrayList record = new ArrayList();
- private final Token reusableToken = new Token();
- private final CharBuffer wsBuf = new CharBuffer();
- private final CharBuffer code = new CharBuffer(4);
+ /**
+ * Immutable empty String array.
+ */
+ private static final String[] EMPTY_STRING_ARRAY = new String[0];
-
- /**
- * Token is an internal token representation.
- *
- * It is used as contract between the lexer and the parser.
- */
- static class Token {
- /** Token type, see TT_xxx constants. */
- int type = TT_INVALID;
- /** The content buffer. */
- CharBuffer content = new CharBuffer(INITIAL_TOKEN_LENGTH);
- /** Token ready flag: indicates a valid token with content (ready for the parser). */
- boolean isReady;
-
- Token reset() {
- content.clear();
- type = TT_INVALID;
- isReady = false;
- return this;
- }
- }
-
- // ======================================================
- // the constructor
- // ======================================================
-
- /**
- * Default strategy for the parser follows the default {@link CSVStrategy}.
- *
- * @param input an InputStream containing "csv-formatted" stream
- * @deprecated use {@link #CSVParser(Reader)}.
- */
- public CSVParser(InputStream input) {
- this(new InputStreamReader(input));
- }
-
- /**
- * CSV parser using the default {@link CSVStrategy}.
- *
- * @param input a Reader containing "csv-formatted" input
- */
- public CSVParser(Reader input) {
- this(input, (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone());
- }
-
- /**
- * Customized value delimiter parser.
- *
- * The parser follows the default {@link CSVStrategy}
- * except for the delimiter setting.
- *
- * @param input a Reader based on "csv-formatted" input
- * @param delimiter a Char used for value separation
- * @deprecated use {@link #CSVParser(Reader,CSVStrategy)}.
- */
- public CSVParser(Reader input, char delimiter) {
- this(input, delimiter, '"', CSVStrategy.COMMENTS_DISABLED);
- }
-
- /**
- * Customized csv parser.
- *
- * The parser parses according to the given CSV dialect settings.
- * Leading whitespaces are truncated, unicode escapes are
- * not interpreted and empty lines are ignored.
- *
- * @param input a Reader based on "csv-formatted" input
- * @param delimiter a Char used for value separation
- * @param encapsulator a Char used as value encapsulation marker
- * @param commentStart a Char used for comment identification
- * @deprecated use {@link #CSVParser(Reader,CSVStrategy)}.
- */
- public CSVParser(Reader input, char delimiter, char encapsulator, char commentStart) {
- this(input, new CSVStrategy(delimiter, encapsulator, commentStart));
- }
+ // the input stream
+ private final ExtendedBufferedReader in;
- /**
- * Customized CSV parser using the given {@link CSVStrategy}
- *
- * @param input a Reader containing "csv-formatted" input
- * @param strategy the CSVStrategy used for CSV parsing
- */
- public CSVParser(Reader input, CSVStrategy strategy) {
- this.in = new ExtendedBufferedReader(input);
- this.strategy = strategy;
- }
-
- // ======================================================
- // the parser
- // ======================================================
-
- /**
- * Parses the CSV according to the given strategy
- * and returns the content as an array of records
- * (whereas records are arrays of single values).
- *
- * The returned content starts at the current parse-position in
- * the stream.
- *
- * @return matrix of records x values ('null' when end of file)
- * @throws IOException on parse error or input read-failure
- */
- public String[][] getAllValues() throws IOException {
- ArrayList records = new ArrayList();
- String[] values;
- String[][] ret = null;
- while ((values = getLine()) != null) {
- records.add(values);
+ private final CSVStrategy strategy;
+
+ // the following objects are shared to reduce garbage
+ /**
+ * A record buffer for getLine(). Grows as necessary and is reused.
+ */
+ private final ArrayList record = new ArrayList();
+ private final Token reusableToken = new Token();
+ private final CharBuffer wsBuf = new CharBuffer();
+ private final CharBuffer code = new CharBuffer(4);
+
+
+ /**
+ * Token is an internal token representation.
+ *
+ * It is used as contract between the lexer and the parser.
+ */
+ static class Token {
+ /**
+ * Token type, see TT_xxx constants.
+ */
+ int type = TT_INVALID;
+ /**
+ * The content buffer.
+ */
+ CharBuffer content = new CharBuffer(INITIAL_TOKEN_LENGTH);
+ /**
+ * Token ready flag: indicates a valid token with content (ready for the parser).
+ */
+ boolean isReady;
+
+ Token reset() {
+ content.clear();
+ type = TT_INVALID;
+ isReady = false;
+ return this;
+ }
}
- if (records.size() > 0) {
- ret = new String[records.size()][];
- records.toArray(ret);
+
+ // ======================================================
+ // the constructor
+ // ======================================================
+
+ /**
+ * Default strategy for the parser follows the default {@link CSVStrategy}.
+ *
+ * @param input an InputStream containing "csv-formatted" stream
+ * @deprecated use {@link #CSVParser(Reader)}.
+ */
+ public CSVParser(InputStream input) {
+ this(new InputStreamReader(input));
}
- return ret;
- }
-
- /**
- * Parses the CSV according to the given strategy
- * and returns the next csv-value as string.
- *
- * @return next value in the input stream ('null' when end of file)
- * @throws IOException on parse error or input read-failure
- */
- public String nextValue() throws IOException {
- Token tkn = nextToken();
- String ret = null;
- switch (tkn.type) {
- case TT_TOKEN:
- case TT_EORECORD:
- ret = tkn.content.toString();
- break;
- case TT_EOF:
- ret = null;
- break;
- case TT_INVALID:
- default:
- // error no token available (or error)
- throw new IOException(
- "(line " + getLineNumber()
- + ") invalid parse sequence");
- // unreachable: break;
+
+ /**
+ * CSV parser using the default {@link CSVStrategy}.
+ *
+ * @param input a Reader containing "csv-formatted" input
+ */
+ public CSVParser(Reader input) {
+ this(input, (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone());
}
- return ret;
- }
-
- /**
- * Parses from the current point in the stream til
- * the end of the current line.
- *
- * @return array of values til end of line
- * ('null' when end of file has been reached)
- * @throws IOException on parse error or input read-failure
- */
- public String[] getLine() throws IOException {
- String[] ret = EMPTY_STRING_ARRAY;
- record.clear();
- while (true) {
- reusableToken.reset();
- nextToken(reusableToken);
- switch (reusableToken.type) {
+
+ /**
+ * Customized value delimiter parser.
+ *
+ * The parser follows the default {@link CSVStrategy}
+ * except for the delimiter setting.
+ *
+ * @param input a Reader based on "csv-formatted" input
+ * @param delimiter a Char used for value separation
+ * @deprecated use {@link #CSVParser(Reader, CSVStrategy)}.
+ */
+ public CSVParser(Reader input, char delimiter) {
+ this(input, delimiter, '"', CSVStrategy.COMMENTS_DISABLED);
+ }
+
+ /**
+ * Customized csv parser.
+ *
+ * The parser parses according to the given CSV dialect settings.
+ * Leading whitespaces are truncated, unicode escapes are
+ * not interpreted and empty lines are ignored.
+ *
+ * @param input a Reader based on "csv-formatted" input
+ * @param delimiter a Char used for value separation
+ * @param encapsulator a Char used as value encapsulation marker
+ * @param commentStart a Char used for comment identification
+ * @deprecated use {@link #CSVParser(Reader, CSVStrategy)}.
+ */
+ public CSVParser(Reader input, char delimiter, char encapsulator, char commentStart) {
+ this(input, new CSVStrategy(delimiter, encapsulator, commentStart));
+ }
+
+ /**
+ * Customized CSV parser using the given {@link CSVStrategy}
+ *
+ * @param input a Reader containing "csv-formatted" input
+ * @param strategy the CSVStrategy used for CSV parsing
+ */
+ public CSVParser(Reader input, CSVStrategy strategy) {
+ this.in = new ExtendedBufferedReader(input);
+ this.strategy = strategy;
+ }
+
+ // ======================================================
+ // the parser
+ // ======================================================
+
+ /**
+ * Parses the CSV according to the given strategy
+ * and returns the content as an array of records
+ * (whereas records are arrays of single values).
+ *
+ * The returned content starts at the current parse-position in
+ * the stream.
+ *
+ * @return matrix of records x values ('null' when end of file)
+ * @throws IOException on parse error or input read-failure
+ */
+ public String[][] getAllValues() throws IOException {
+ ArrayList records = new ArrayList();
+ String[] values;
+ String[][] ret = null;
+ while ((values = getLine()) != null) {
+ records.add(values);
+ }
+ if (records.size() > 0) {
+ ret = new String[records.size()][];
+ records.toArray(ret);
+ }
+ return ret;
+ }
+
+ /**
+ * Parses the CSV according to the given strategy
+ * and returns the next csv-value as string.
+ *
+ * @return next value in the input stream ('null' when end of file)
+ * @throws IOException on parse error or input read-failure
+ */
+ public String nextValue() throws IOException {
+ Token tkn = nextToken();
+ String ret = null;
+ switch (tkn.type) {
case TT_TOKEN:
- record.add(reusableToken.content.toString());
- break;
case TT_EORECORD:
- record.add(reusableToken.content.toString());
+ ret = tkn.content.toString();
break;
case TT_EOF:
- if (reusableToken.isReady) {
- record.add(reusableToken.content.toString());
- } else {
- ret = null;
- }
+ ret = null;
break;
case TT_INVALID:
default:
- // error: throw IOException
- throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");
- // unreachable: break;
- }
- if (reusableToken.type != TT_TOKEN) {
- break;
+ // error no token available (or error)
+ throw new IOException(
+ "(line " + getLineNumber()
+ + ") invalid parse sequence");
+ // unreachable: break;
}
+ return ret;
}
- if (!record.isEmpty()) {
- ret = (String[]) record.toArray(new String[record.size()]);
- }
- return ret;
- }
-
- /**
- * Returns the current line number in the input stream.
- *
- * ATTENTION: in case your csv has multiline-values the returned
- * number does not correspond to the record-number
- *
- * @return current line number
- */
- public int getLineNumber() {
- return in.getLineNumber();
- }
-
- // ======================================================
- // the lexer(s)
- // ======================================================
-
- /**
- * Convenience method for nextToken(null)
.
- */
- protected Token nextToken() throws IOException {
- return nextToken(new Token());
- }
-
- /**
- * Returns the next token.
- *
- * A token corresponds to a term, a record change or an
- * end-of-file indicator.
- *
- * @param tkn an existing Token object to reuse. The caller is responsible to initialize the
- * Token.
- * @return the next token found
- * @throws IOException on stream access error
- */
- protected Token nextToken(Token tkn) throws IOException {
- wsBuf.clear(); // reuse
-
- // get the last read char (required for empty line detection)
- int lastChar = in.readAgain();
-
- // read the next char and set eol
- /* note: unfortunately isEndOfLine may consumes a character silently.
- * this has no effect outside of the method. so a simple workaround
- * is to call 'readAgain' on the stream...
- * uh: might using objects instead of base-types (jdk1.5 autoboxing!)
+
+ /**
+ * Parses from the current point in the stream til
+ * the end of the current line.
+ *
+ * @return array of values til end of line
+ * ('null' when end of file has been reached)
+ * @throws IOException on parse error or input read-failure
*/
- int c = in.read();
- boolean eol = isEndOfLine(c);
- c = in.readAgain();
-
- // empty line detection: eol AND (last char was EOL or beginning)
- while (strategy.getIgnoreEmptyLines() && eol
- && (lastChar == '\n'
- || lastChar == '\r'
- || lastChar == ExtendedBufferedReader.UNDEFINED)
- && !isEndOfFile(lastChar)) {
- // go on char ahead ...
- lastChar = c;
- c = in.read();
- eol = isEndOfLine(c);
- c = in.readAgain();
- // reached end of file without any content (empty line at the end)
- if (isEndOfFile(c)) {
- tkn.type = TT_EOF;
- return tkn;
- }
- }
-
- // did we reach eof during the last iteration already ? TT_EOF
- if (isEndOfFile(lastChar) || (lastChar != strategy.getDelimiter() && isEndOfFile(c))) {
- tkn.type = TT_EOF;
- return tkn;
- }
-
- // important: make sure a new char gets consumed in each iteration
- while (!tkn.isReady && tkn.type != TT_EOF) {
- // ignore whitespaces at beginning of a token
- while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) {
- wsBuf.append((char) c);
- c = in.read();
- eol = isEndOfLine(c);
- }
- // ok, start of token reached: comment, encapsulated, or token
- if (c == strategy.getCommentStart()) {
- // ignore everything till end of line and continue (incr linecount)
- in.readLine();
- tkn = nextToken(tkn.reset());
- } else if (c == strategy.getDelimiter()) {
- // empty token return TT_TOKEN("")
- tkn.type = TT_TOKEN;
- tkn.isReady = true;
- } else if (eol) {
- // empty token return TT_EORECORD("")
- //noop: tkn.content.append("");
- tkn.type = TT_EORECORD;
- tkn.isReady = true;
- } else if (c == strategy.getEncapsulator()) {
- // consume encapsulated token
- encapsulatedTokenLexer(tkn, c);
- } else if (isEndOfFile(c)) {
- // end of file return TT_EOF()
- //noop: tkn.content.append("");
- tkn.type = TT_EOF;
- tkn.isReady = true;
- } else {
- // next token must be a simple token
- // add removed blanks when not ignoring whitespace chars...
- if (!strategy.getIgnoreLeadingWhitespaces()) {
- tkn.content.append(wsBuf);
- }
- simpleTokenLexer(tkn, c);
- }
- }
- return tkn;
- }
-
- /**
- * A simple token lexer
- *
- * Simple token are tokens which are not surrounded by encapsulators.
- * A simple token might contain escaped delimiters (as \, or \;). The
- * token is finished when one of the following conditions become true:
- *
- * - end of line has been reached (TT_EORECORD)
- * - end of stream has been reached (TT_EOF)
- * - an unescaped delimiter has been reached (TT_TOKEN)
- *
- *
- * @param tkn the current token
- * @param c the current character
- * @return the filled token
- *
- * @throws IOException on stream access error
- */
- private Token simpleTokenLexer(Token tkn, int c) throws IOException {
- for (;;) {
- if (isEndOfLine(c)) {
- // end of record
- tkn.type = TT_EORECORD;
- tkn.isReady = true;
- break;
- } else if (isEndOfFile(c)) {
- // end of file
- tkn.type = TT_EOF;
- tkn.isReady = true;
- break;
- } else if (c == strategy.getDelimiter()) {
- // end of token
- tkn.type = TT_TOKEN;
- tkn.isReady = true;
- break;
- } else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
- // interpret unicode escaped chars (like \u0070 -> p)
- tkn.content.append((char) unicodeEscapeLexer(c));
- } else if (c == strategy.getEscape()) {
- tkn.content.append((char)readEscape(c));
- } else {
- tkn.content.append((char) c);
- }
-
- c = in.read();
- }
-
- if (strategy.getIgnoreTrailingWhitespaces()) {
- tkn.content.trimTrailingWhitespace();
- }
-
- return tkn;
- }
-
-
- /**
- * An encapsulated token lexer
- *
- * Encapsulated tokens are surrounded by the given encapsulating-string.
- * The encapsulator itself might be included in the token using a
- * doubling syntax (as "", '') or using escaping (as in \", \').
- * Whitespaces before and after an encapsulated token are ignored.
- *
- * @param tkn the current token
- * @param c the current character
- * @return a valid token object
- * @throws IOException on invalid state
- */
- private Token encapsulatedTokenLexer(Token tkn, int c) throws IOException {
- // save current line
- int startLineNumber = getLineNumber();
- // ignore the given delimiter
- // assert c == delimiter;
- for (;;) {
- c = in.read();
-
- if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead()=='u') {
- tkn.content.append((char) unicodeEscapeLexer(c));
- } else if (c == strategy.getEscape()) {
- tkn.content.append((char)readEscape(c));
- } else if (c == strategy.getEncapsulator()) {
- if (in.lookAhead() == strategy.getEncapsulator()) {
- // double or escaped encapsulator -> add single encapsulator to token
- c = in.read();
- tkn.content.append((char) c);
- } else {
- // token finish mark (encapsulator) reached: ignore whitespace till delimiter
- for (;;) {
- c = in.read();
- if (c == strategy.getDelimiter()) {
- tkn.type = TT_TOKEN;
- tkn.isReady = true;
- return tkn;
- } else if (isEndOfFile(c)) {
- tkn.type = TT_EOF;
- tkn.isReady = true;
- return tkn;
- } else if (isEndOfLine(c)) {
- // ok eo token reached
- tkn.type = TT_EORECORD;
- tkn.isReady = true;
- return tkn;
- } else if (!isWhitespace(c)) {
- // error invalid char between token and next delimiter
- throw new IOException(
- "(line " + getLineNumber()
- + ") invalid char between encapsulated token end delimiter"
- );
+ public String[] getLine() throws IOException {
+ String[] ret = EMPTY_STRING_ARRAY;
+ record.clear();
+ while (true) {
+ reusableToken.reset();
+ nextToken(reusableToken);
+ switch (reusableToken.type) {
+ case TT_TOKEN:
+ record.add(reusableToken.content.toString());
+ break;
+ case TT_EORECORD:
+ record.add(reusableToken.content.toString());
+ break;
+ case TT_EOF:
+ if (reusableToken.isReady) {
+ record.add(reusableToken.content.toString());
+ } else {
+ ret = null;
+ }
+ break;
+ case TT_INVALID:
+ default:
+ // error: throw IOException
+ throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");
+ // unreachable: break;
+ }
+ if (reusableToken.type != TT_TOKEN) {
+ break;
}
- }
}
- } else if (isEndOfFile(c)) {
- // error condition (end of file before end of token)
- throw new IOException(
- "(startline " + startLineNumber + ")"
- + "eof reached before encapsulated token finished"
- );
- } else {
- // consume character
- tkn.content.append((char) c);
- }
- }
- }
-
-
- /**
- * Decodes Unicode escapes.
- *
- * Interpretation of "\\uXXXX" escape sequences
- * where XXXX is a hex-number.
- * @param c current char which is discarded because it's the "\\" of "\\uXXXX"
- * @return the decoded character
- * @throws IOException on wrong unicode escape sequence or read error
- */
- protected int unicodeEscapeLexer(int c) throws IOException {
- int ret = 0;
- // ignore 'u' (assume c==\ now) and read 4 hex digits
- c = in.read();
- code.clear();
- try {
- for (int i = 0; i < 4; i++) {
- c = in.read();
- if (isEndOfFile(c) || isEndOfLine(c)) {
- throw new NumberFormatException("number too short");
+ if (!record.isEmpty()) {
+ ret = (String[]) record.toArray(new String[record.size()]);
}
- code.append((char) c);
- }
- ret = Integer.parseInt(code.toString(), 16);
- } catch (NumberFormatException e) {
- throw new IOException(
- "(line " + getLineNumber() + ") Wrong unicode escape sequence found '"
- + code.toString() + "'" + e.toString());
+ return ret;
}
- return ret;
- }
- private int readEscape(int c) throws IOException {
- // assume c is the escape char (normally a backslash)
- c = in.read();
- int out;
- switch (c) {
- case 'r': out='\r'; break;
- case 'n': out='\n'; break;
- case 't': out='\t'; break;
- case 'b': out='\b'; break;
- case 'f': out='\f'; break;
- default : out=c;
+ /**
+ * Returns the current line number in the input stream.
+ *
+ * ATTENTION: in case your csv has multiline-values the returned
+ * number does not correspond to the record-number
+ *
+ * @return current line number
+ */
+ public int getLineNumber() {
+ return in.getLineNumber();
}
- return out;
- }
-
- // ======================================================
- // strategies
- // ======================================================
-
- /**
- * Obtain the specified CSV Strategy. This should not be modified.
- *
- * @return strategy currently being used
- */
- public CSVStrategy getStrategy() {
- return this.strategy;
- }
-
- // ======================================================
- // Character class checker
- // ======================================================
-
- /**
- * @return true if the given char is a whitespace character
- */
- private boolean isWhitespace(int c) {
- return Character.isWhitespace((char) c) && (c != strategy.getDelimiter());
- }
-
- /**
- * Greedy - accepts \n, \r and \r\n
- * This checker consumes silently the second control-character...
- *
- * @return true if the given character is a line-terminator
- */
- private boolean isEndOfLine(int c) throws IOException {
- // check if we have \r\n...
- if (c == '\r') {
- if (in.lookAhead() == '\n') {
- // note: does not change c outside of this method !!
+
+ // ======================================================
+ // the lexer(s)
+ // ======================================================
+
+ /**
+ * Convenience method for nextToken(null)
.
+ */
+ protected Token nextToken() throws IOException {
+ return nextToken(new Token());
+ }
+
+ /**
+ * Returns the next token.
+ *
+ * A token corresponds to a term, a record change or an
+ * end-of-file indicator.
+ *
+ * @param tkn an existing Token object to reuse. The caller is responsible to initialize the
+ * Token.
+ * @return the next token found
+ * @throws IOException on stream access error
+ */
+ protected Token nextToken(Token tkn) throws IOException {
+ wsBuf.clear(); // reuse
+
+ // get the last read char (required for empty line detection)
+ int lastChar = in.readAgain();
+
+ // read the next char and set eol
+ /* note: unfortunately isEndOfLine may consumes a character silently.
+ * this has no effect outside of the method. so a simple workaround
+ * is to call 'readAgain' on the stream...
+ * uh: might using objects instead of base-types (jdk1.5 autoboxing!)
+ */
+ int c = in.read();
+ boolean eol = isEndOfLine(c);
+ c = in.readAgain();
+
+ // empty line detection: eol AND (last char was EOL or beginning)
+ while (strategy.getIgnoreEmptyLines() && eol
+ && (lastChar == '\n'
+ || lastChar == '\r'
+ || lastChar == ExtendedBufferedReader.UNDEFINED)
+ && !isEndOfFile(lastChar)) {
+ // go on char ahead ...
+ lastChar = c;
+ c = in.read();
+ eol = isEndOfLine(c);
+ c = in.readAgain();
+ // reached end of file without any content (empty line at the end)
+ if (isEndOfFile(c)) {
+ tkn.type = TT_EOF;
+ return tkn;
+ }
+ }
+
+ // did we reach eof during the last iteration already ? TT_EOF
+ if (isEndOfFile(lastChar) || (lastChar != strategy.getDelimiter() && isEndOfFile(c))) {
+ tkn.type = TT_EOF;
+ return tkn;
+ }
+
+ // important: make sure a new char gets consumed in each iteration
+ while (!tkn.isReady && tkn.type != TT_EOF) {
+ // ignore whitespaces at beginning of a token
+ while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) {
+ wsBuf.append((char) c);
+ c = in.read();
+ eol = isEndOfLine(c);
+ }
+ // ok, start of token reached: comment, encapsulated, or token
+ if (c == strategy.getCommentStart()) {
+ // ignore everything till end of line and continue (incr linecount)
+ in.readLine();
+ tkn = nextToken(tkn.reset());
+ } else if (c == strategy.getDelimiter()) {
+ // empty token return TT_TOKEN("")
+ tkn.type = TT_TOKEN;
+ tkn.isReady = true;
+ } else if (eol) {
+ // empty token return TT_EORECORD("")
+ //noop: tkn.content.append("");
+ tkn.type = TT_EORECORD;
+ tkn.isReady = true;
+ } else if (c == strategy.getEncapsulator()) {
+ // consume encapsulated token
+ encapsulatedTokenLexer(tkn, c);
+ } else if (isEndOfFile(c)) {
+ // end of file return TT_EOF()
+ //noop: tkn.content.append("");
+ tkn.type = TT_EOF;
+ tkn.isReady = true;
+ } else {
+ // next token must be a simple token
+ // add removed blanks when not ignoring whitespace chars...
+ if (!strategy.getIgnoreLeadingWhitespaces()) {
+ tkn.content.append(wsBuf);
+ }
+ simpleTokenLexer(tkn, c);
+ }
+ }
+ return tkn;
+ }
+
+ /**
+ * A simple token lexer
+ *
+ * Simple token are tokens which are not surrounded by encapsulators.
+ * A simple token might contain escaped delimiters (as \, or \;). The
+ * token is finished when one of the following conditions become true:
+ *
+ * - end of line has been reached (TT_EORECORD)
+ * - end of stream has been reached (TT_EOF)
+ * - an unescaped delimiter has been reached (TT_TOKEN)
+ *
+ *
+ * @param tkn the current token
+ * @param c the current character
+ * @return the filled token
+ * @throws IOException on stream access error
+ */
+ private Token simpleTokenLexer(Token tkn, int c) throws IOException {
+ for (; ;) {
+ if (isEndOfLine(c)) {
+ // end of record
+ tkn.type = TT_EORECORD;
+ tkn.isReady = true;
+ break;
+ } else if (isEndOfFile(c)) {
+ // end of file
+ tkn.type = TT_EOF;
+ tkn.isReady = true;
+ break;
+ } else if (c == strategy.getDelimiter()) {
+ // end of token
+ tkn.type = TT_TOKEN;
+ tkn.isReady = true;
+ break;
+ } else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
+ // interpret unicode escaped chars (like \u0070 -> p)
+ tkn.content.append((char) unicodeEscapeLexer(c));
+ } else if (c == strategy.getEscape()) {
+ tkn.content.append((char) readEscape(c));
+ } else {
+ tkn.content.append((char) c);
+ }
+
+ c = in.read();
+ }
+
+ if (strategy.getIgnoreTrailingWhitespaces()) {
+ tkn.content.trimTrailingWhitespace();
+ }
+
+ return tkn;
+ }
+
+
+ /**
+ * An encapsulated token lexer
+ *
+ * Encapsulated tokens are surrounded by the given encapsulating-string.
+ * The encapsulator itself might be included in the token using a
+ * doubling syntax (as "", '') or using escaping (as in \", \').
+ * Whitespaces before and after an encapsulated token are ignored.
+ *
+ * @param tkn the current token
+ * @param c the current character
+ * @return a valid token object
+ * @throws IOException on invalid state
+ */
+ private Token encapsulatedTokenLexer(Token tkn, int c) throws IOException {
+ // save current line
+ int startLineNumber = getLineNumber();
+ // ignore the given delimiter
+ // assert c == delimiter;
+ for (; ;) {
+ c = in.read();
+
+ if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
+ tkn.content.append((char) unicodeEscapeLexer(c));
+ } else if (c == strategy.getEscape()) {
+ tkn.content.append((char) readEscape(c));
+ } else if (c == strategy.getEncapsulator()) {
+ if (in.lookAhead() == strategy.getEncapsulator()) {
+ // double or escaped encapsulator -> add single encapsulator to token
+ c = in.read();
+ tkn.content.append((char) c);
+ } else {
+ // token finish mark (encapsulator) reached: ignore whitespace till delimiter
+ for (; ;) {
+ c = in.read();
+ if (c == strategy.getDelimiter()) {
+ tkn.type = TT_TOKEN;
+ tkn.isReady = true;
+ return tkn;
+ } else if (isEndOfFile(c)) {
+ tkn.type = TT_EOF;
+ tkn.isReady = true;
+ return tkn;
+ } else if (isEndOfLine(c)) {
+ // ok eo token reached
+ tkn.type = TT_EORECORD;
+ tkn.isReady = true;
+ return tkn;
+ } else if (!isWhitespace(c)) {
+ // error invalid char between token and next delimiter
+ throw new IOException(
+ "(line " + getLineNumber()
+ + ") invalid char between encapsulated token end delimiter"
+ );
+ }
+ }
+ }
+ } else if (isEndOfFile(c)) {
+ // error condition (end of file before end of token)
+ throw new IOException(
+ "(startline " + startLineNumber + ")"
+ + "eof reached before encapsulated token finished"
+ );
+ } else {
+ // consume character
+ tkn.content.append((char) c);
+ }
+ }
+ }
+
+
+ /**
+ * Decodes Unicode escapes.
+ *
+ * Interpretation of "\\uXXXX" escape sequences
+ * where XXXX is a hex-number.
+ *
+ * @param c current char which is discarded because it's the "\\" of "\\uXXXX"
+ * @return the decoded character
+ * @throws IOException on wrong unicode escape sequence or read error
+ */
+ protected int unicodeEscapeLexer(int c) throws IOException {
+ int ret = 0;
+ // ignore 'u' (assume c==\ now) and read 4 hex digits
c = in.read();
- }
+ code.clear();
+ try {
+ for (int i = 0; i < 4; i++) {
+ c = in.read();
+ if (isEndOfFile(c) || isEndOfLine(c)) {
+ throw new NumberFormatException("number too short");
+ }
+ code.append((char) c);
+ }
+ ret = Integer.parseInt(code.toString(), 16);
+ } catch (NumberFormatException e) {
+ throw new IOException(
+ "(line " + getLineNumber() + ") Wrong unicode escape sequence found '"
+ + code.toString() + "'" + e.toString());
+ }
+ return ret;
+ }
+
+ private int readEscape(int c) throws IOException {
+ // assume c is the escape char (normally a backslash)
+ c = in.read();
+ int out;
+ switch (c) {
+ case 'r':
+ out = '\r';
+ break;
+ case 'n':
+ out = '\n';
+ break;
+ case 't':
+ out = '\t';
+ break;
+ case 'b':
+ out = '\b';
+ break;
+ case 'f':
+ out = '\f';
+ break;
+ default:
+ out = c;
+ }
+ return out;
+ }
+
+ // ======================================================
+ // strategies
+ // ======================================================
+
+ /**
+ * Obtain the specified CSV Strategy. This should not be modified.
+ *
+ * @return strategy currently being used
+ */
+ public CSVStrategy getStrategy() {
+ return this.strategy;
+ }
+
+ // ======================================================
+ // Character class checker
+ // ======================================================
+
+ /**
+ * @return true if the given char is a whitespace character
+ */
+ private boolean isWhitespace(int c) {
+ return Character.isWhitespace((char) c) && (c != strategy.getDelimiter());
+ }
+
+ /**
+ * Greedy - accepts \n, \r and \r\n
+ * This checker consumes silently the second control-character...
+ *
+ * @return true if the given character is a line-terminator
+ */
+ private boolean isEndOfLine(int c) throws IOException {
+ // check if we have \r\n...
+ if (c == '\r') {
+ if (in.lookAhead() == '\n') {
+ // note: does not change c outside of this method !!
+ c = in.read();
+ }
+ }
+ return (c == '\n' || c == '\r');
+ }
+
+ /**
+ * @return true if the given character indicates end of file
+ */
+ private boolean isEndOfFile(int c) {
+ return c == ExtendedBufferedReader.END_OF_STREAM;
}
- return (c == '\n' || c == '\r');
- }
-
- /**
- * @return true if the given character indicates end of file
- */
- private boolean isEndOfFile(int c) {
- return c == ExtendedBufferedReader.END_OF_STREAM;
- }
}
diff --git a/src/java/org/apache/commons/csv/CSVPrinter.java b/src/java/org/apache/commons/csv/CSVPrinter.java
index 2193c11e..fd2ebc26 100644
--- a/src/java/org/apache/commons/csv/CSVPrinter.java
+++ b/src/java/org/apache/commons/csv/CSVPrinter.java
@@ -26,282 +26,289 @@ import java.io.Writer;
*/
public class CSVPrinter {
- /** The place that the values get written. */
- protected final Writer out;
- protected final CSVStrategy strategy;
+ /**
+ * The place that the values get written.
+ */
+ protected final Writer out;
+ protected final CSVStrategy strategy;
- /** True if we just began a new line. */
- protected boolean newLine = true;
+ /**
+ * True if we just began a new line.
+ */
+ protected boolean newLine = true;
- protected char[] buf = new char[0]; // temporary buffer
+ protected char[] buf = new char[0]; // temporary buffer
- /**
- * Create a printer that will print values to the given
- * stream following the CSVStrategy.
- *
- * Currently, only a pure encapsulation strategy or a pure escaping strategy
- * is supported. Hybrid strategies (encapsulation and escaping with a different character) are not supported.
- *
- * @param out stream to which to print.
- * @param strategy describes the CSV variation.
- */
- public CSVPrinter(Writer out, CSVStrategy strategy) {
- this.out = out;
- this.strategy = strategy==null ? CSVStrategy.DEFAULT_STRATEGY : strategy;
- }
-
- // ======================================================
- // printing implementation
- // ======================================================
-
- /**
- * Output a blank line
- */
- public void println() throws IOException {
- out.write(strategy.getPrinterNewline());
- newLine = true;
- }
-
- public void flush() throws IOException {
- out.flush();
- }
-
-
- /**
- * Print a single line of comma separated values.
- * The values will be quoted if needed. Quotes and
- * newLine characters will be escaped.
- *
- * @param values values to be outputted.
- */
- public void println(String[] values) throws IOException {
- for (int i = 0; i < values.length; i++) {
- print(values[i]);
- }
- println();
- }
-
-
- /**
- * Put a comment among the comma separated values.
- * Comments will always begin on a new line and occupy a
- * least one full line. The character specified to star
- * comments and a space will be inserted at the beginning of
- * each new line in the comment.
- *
- * @param comment the comment to output
- */
- public void printlnComment(String comment) throws IOException {
- if(this.strategy.isCommentingDisabled()) {
- return;
- }
- if (!newLine) {
- println();
- }
- out.write(this.strategy.getCommentStart());
- out.write(' ');
- for (int i = 0; i < comment.length(); i++) {
- char c = comment.charAt(i);
- switch (c) {
- case '\r' :
- if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') {
- i++;
- }
- // break intentionally excluded.
- case '\n' :
- println();
- out.write(this.strategy.getCommentStart());
- out.write(' ');
- break;
- default :
- out.write(c);
- break;
- }
- }
- println();
- }
-
-
- public void print(char[] value, int offset, int len, boolean checkForEscape) throws IOException {
- if (!checkForEscape) {
- printSep();
- out.write(value, offset, len);
- return;
+ /**
+ * Create a printer that will print values to the given
+ * stream following the CSVStrategy.
+ *
+ * Currently, only a pure encapsulation strategy or a pure escaping strategy
+ * is supported. Hybrid strategies (encapsulation and escaping with a different character) are not supported.
+ *
+ * @param out stream to which to print.
+ * @param strategy describes the CSV variation.
+ */
+ public CSVPrinter(Writer out, CSVStrategy strategy) {
+ this.out = out;
+ this.strategy = strategy == null ? CSVStrategy.DEFAULT_STRATEGY : strategy;
}
- if (strategy.getEncapsulator() != CSVStrategy.ENCAPSULATOR_DISABLED) {
- printAndEncapsulate(value, offset, len);
- } else if (strategy.getEscape() != CSVStrategy.ESCAPE_DISABLED) {
- printAndEscape(value, offset, len);
- } else {
- printSep();
- out.write(value, offset, len);
+ // ======================================================
+ // printing implementation
+ // ======================================================
+
+ /**
+ * Output a blank line
+ */
+ public void println() throws IOException {
+ out.write(strategy.getPrinterNewline());
+ newLine = true;
}
- }
- void printSep() throws IOException {
- if (newLine) {
- newLine = false;
- } else {
- out.write(this.strategy.getDelimiter());
+ public void flush() throws IOException {
+ out.flush();
}
- }
- void printAndEscape(char[] value, int offset, int len) throws IOException {
- int start = offset;
- int pos = offset;
- int end = offset + len;
- printSep();
-
- char delim = this.strategy.getDelimiter();
- char escape = this.strategy.getEscape();
-
- while (pos < end) {
- char c = value[pos];
- if (c == '\r' || c=='\n' || c==delim || c==escape) {
- // write out segment up until this char
- int l = pos-start;
- if (l>0) {
- out.write(value, start, l);
+ /**
+ * Print a single line of comma separated values.
+ * The values will be quoted if needed. Quotes and
+ * newLine characters will be escaped.
+ *
+ * @param values values to be outputted.
+ */
+ public void println(String[] values) throws IOException {
+ for (int i = 0; i < values.length; i++) {
+ print(values[i]);
}
- if (c=='\n') c='n';
- else if (c=='\r') c='r';
-
- out.write(escape);
- out.write(c);
-
- start = pos+1; // start on the current char after this one
- }
-
- pos++;
+ println();
}
- // write last segment
- int l = pos-start;
- if (l>0) {
- out.write(value, start, l);
+
+ /**
+ * Put a comment among the comma separated values.
+ * Comments will always begin on a new line and occupy a
+ * least one full line. The character specified to star
+ * comments and a space will be inserted at the beginning of
+ * each new line in the comment.
+ *
+ * @param comment the comment to output
+ */
+ public void printlnComment(String comment) throws IOException {
+ if (this.strategy.isCommentingDisabled()) {
+ return;
+ }
+ if (!newLine) {
+ println();
+ }
+ out.write(this.strategy.getCommentStart());
+ out.write(' ');
+ for (int i = 0; i < comment.length(); i++) {
+ char c = comment.charAt(i);
+ switch (c) {
+ case '\r':
+ if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') {
+ i++;
+ }
+ // break intentionally excluded.
+ case '\n':
+ println();
+ out.write(this.strategy.getCommentStart());
+ out.write(' ');
+ break;
+ default:
+ out.write(c);
+ break;
+ }
+ }
+ println();
}
- }
- void printAndEncapsulate(char[] value, int offset, int len) throws IOException {
- boolean first = newLine; // is this the first value on this line?
- boolean quote = false;
- int start = offset;
- int pos = offset;
- int end = offset + len;
- printSep();
+ public void print(char[] value, int offset, int len, boolean checkForEscape) throws IOException {
+ if (!checkForEscape) {
+ printSep();
+ out.write(value, offset, len);
+ return;
+ }
- char delim = this.strategy.getDelimiter();
- char encapsulator = this.strategy.getEncapsulator();
+ if (strategy.getEncapsulator() != CSVStrategy.ENCAPSULATOR_DISABLED) {
+ printAndEncapsulate(value, offset, len);
+ } else if (strategy.getEscape() != CSVStrategy.ESCAPE_DISABLED) {
+ printAndEscape(value, offset, len);
+ } else {
+ printSep();
+ out.write(value, offset, len);
+ }
+ }
- if (len <= 0) {
- // always quote an empty token that is the first
- // on the line, as it may be the only thing on the
- // line. If it were not quoted in that case,
- // an empty line has no tokens.
- if (first) {
- quote = true;
- }
- } else {
- char c = value[pos];
+ void printSep() throws IOException {
+ if (newLine) {
+ newLine = false;
+ } else {
+ out.write(this.strategy.getDelimiter());
+ }
+ }
+
+ void printAndEscape(char[] value, int offset, int len) throws IOException {
+ int start = offset;
+ int pos = offset;
+ int end = offset + len;
+
+ printSep();
+
+ char delim = this.strategy.getDelimiter();
+ char escape = this.strategy.getEscape();
- // Hmmm, where did this rule come from?
- if (first
- && (c < '0'
- || (c > '9' && c < 'A')
- || (c > 'Z' && c < 'a')
- || (c > 'z'))) {
- quote = true;
- // } else if (c == ' ' || c == '\f' || c == '\t') {
- } else if (c <= '#') {
- // Some other chars at the start of a value caused the parser to fail, so for now
- // encapsulate if we start in anything less than '#'. We are being conservative
- // by including the default comment char too.
- quote = true;
- } else {
while (pos < end) {
- c = value[pos];
- if (c=='\n' || c=='\r' || c==encapsulator || c==delim) {
- quote = true;
- break;
- }
- pos++;
+ char c = value[pos];
+ if (c == '\r' || c == '\n' || c == delim || c == escape) {
+ // write out segment up until this char
+ int l = pos - start;
+ if (l > 0) {
+ out.write(value, start, l);
+ }
+ if (c == '\n') {
+ c = 'n';
+ } else if (c == '\r') {
+ c = 'r';
+ }
+
+ out.write(escape);
+ out.write(c);
+
+ start = pos + 1; // start on the current char after this one
+ }
+
+ pos++;
+ }
+
+ // write last segment
+ int l = pos - start;
+ if (l > 0) {
+ out.write(value, start, l);
+ }
+ }
+
+ void printAndEncapsulate(char[] value, int offset, int len) throws IOException {
+ boolean first = newLine; // is this the first value on this line?
+ boolean quote = false;
+ int start = offset;
+ int pos = offset;
+ int end = offset + len;
+
+ printSep();
+
+ char delim = this.strategy.getDelimiter();
+ char encapsulator = this.strategy.getEncapsulator();
+
+ if (len <= 0) {
+ // always quote an empty token that is the first
+ // on the line, as it may be the only thing on the
+ // line. If it were not quoted in that case,
+ // an empty line has no tokens.
+ if (first) {
+ quote = true;
+ }
+ } else {
+ char c = value[pos];
+
+ // Hmmm, where did this rule come from?
+ if (first
+ && (c < '0'
+ || (c > '9' && c < 'A')
+ || (c > 'Z' && c < 'a')
+ || (c > 'z'))) {
+ quote = true;
+ // } else if (c == ' ' || c == '\f' || c == '\t') {
+ } else if (c <= '#') {
+ // Some other chars at the start of a value caused the parser to fail, so for now
+ // encapsulate if we start in anything less than '#'. We are being conservative
+ // by including the default comment char too.
+ quote = true;
+ } else {
+ while (pos < end) {
+ c = value[pos];
+ if (c == '\n' || c == '\r' || c == encapsulator || c == delim) {
+ quote = true;
+ break;
+ }
+ pos++;
+ }
+
+ if (!quote) {
+ pos = end - 1;
+ c = value[pos];
+ // if (c == ' ' || c == '\f' || c == '\t') {
+ // Some other chars at the end caused the parser to fail, so for now
+ // encapsulate if we end in anything less than ' '
+ if (c <= ' ') {
+ quote = true;
+ }
+ }
+ }
}
if (!quote) {
- pos = end-1;
- c = value[pos];
- // if (c == ' ' || c == '\f' || c == '\t') {
- // Some other chars at the end caused the parser to fail, so for now
- // encapsulate if we end in anything less than ' '
- if (c <= ' ') {
- quote = true;
- }
+ // no encapsulation needed - write out the original value
+ out.write(value, offset, len);
+ return;
}
- }
+
+ // we hit something that needed encapsulation
+ out.write(encapsulator);
+
+ // Pick up where we left off: pos should be positioned on the first character that caused
+ // the need for encapsulation.
+ while (pos < end) {
+ char c = value[pos];
+ if (c == encapsulator) {
+ // write out the chunk up until this point
+
+ // add 1 to the length to write out the encapsulator also
+ out.write(value, start, pos - start + 1);
+ // put the next starting position on the encapsulator so we will
+ // write it out again with the next string (effectively doubling it)
+ start = pos;
+ }
+ pos++;
+ }
+
+ // write the last segment
+ out.write(value, start, pos - start);
+ out.write(encapsulator);
}
- if (!quote) {
- // no encapsulation needed - write out the original value
- out.write(value, offset, len);
- return;
+ /**
+ * Print the string as the next value on the line. The value
+ * will be escaped or encapsulated as needed if checkForEscape==true
+ *
+ * @param value value to be outputted.
+ */
+ public void print(String value, boolean checkForEscape) throws IOException {
+ if (!checkForEscape) {
+ // write directly from string
+ printSep();
+ out.write(value);
+ return;
+ }
+
+ if (buf.length < value.length()) {
+ buf = new char[value.length()];
+ }
+
+ value.getChars(0, value.length(), buf, 0);
+ print(buf, 0, value.length(), checkForEscape);
}
- // we hit something that needed encapsulation
- out.write(encapsulator);
-
- // Pick up where we left off: pos should be positioned on the first character that caused
- // the need for encapsulation.
- while (posCSVUtils
instances should NOT be constructed in
- * standard programming.
+ * standard programming.
*
* This constructor is public to permit tools that require a JavaBean
* instance to operate.
*/
public CSVUtils() {
}
-
+
/**
* Converts an array of string values into a single CSV line. All
* null
values are converted to the string "null"
,
@@ -46,13 +46,13 @@ public class CSVUtils {
*
* @param values the value array
* @return the CSV string, will be an empty string if the length of the
- * value array is 0
+ * value array is 0
*/
public static String printLine(String[] values, CSVStrategy strategy) {
// set up a CSVUtils
StringWriter stringWriter = new StringWriter();
CSVPrinter csvPrinter = new CSVPrinter(stringWriter, strategy);
-
+
// check for null values an "null" as strings and convert them
// into the strings "null" and "\"null\""
for (int i = 0; i < values.length; i++) {
@@ -62,60 +62,60 @@ public class CSVUtils {
values[i] = "\"null\"";
}
}
-
+
// convert to CSV
try {
- csvPrinter.println(values);
+ csvPrinter.println(values);
} catch (IOException e) {
- // should not happen with StringWriter
+ // should not happen with StringWriter
}
// as the resulting string has \r\n at the end, we will trim that away
return stringWriter.toString().trim();
}
-
- // ======================================================
- // static parsers
- // ======================================================
-
- /**
- * Parses the given String according to the default {@link CSVStrategy}.
- *
- * @param s CSV String to be parsed.
- * @return parsed String matrix (which is never null)
- * @throws IOException in case of error
- */
- public static String[][] parse(String s) throws IOException {
- if (s == null) {
- throw new IllegalArgumentException("Null argument not allowed.");
+
+ // ======================================================
+ // static parsers
+ // ======================================================
+
+ /**
+ * Parses the given String according to the default {@link CSVStrategy}.
+ *
+ * @param s CSV String to be parsed.
+ * @return parsed String matrix (which is never null)
+ * @throws IOException in case of error
+ */
+ public static String[][] parse(String s) throws IOException {
+ if (s == null) {
+ throw new IllegalArgumentException("Null argument not allowed.");
+ }
+ String[][] result = (new CSVParser(new StringReader(s))).getAllValues();
+ if (result == null) {
+ // since CSVStrategy ignores empty lines an empty array is returned
+ // (i.e. not "result = new String[][] {{""}};")
+ result = EMPTY_DOUBLE_STRING_ARRAY;
+ }
+ return result;
}
- String[][] result = (new CSVParser(new StringReader(s))).getAllValues();
- if (result == null) {
- // since CSVStrategy ignores empty lines an empty array is returned
- // (i.e. not "result = new String[][] {{""}};")
- result = EMPTY_DOUBLE_STRING_ARRAY;
+
+ /**
+ * Parses the first line only according to the default {@link CSVStrategy}.
+ *
+ * Parsing empty string will be handled as valid records containing zero
+ * elements, so the following property holds: parseLine("").length == 0.
+ *
+ * @param s CSV String to be parsed.
+ * @return parsed String vector (which is never null)
+ * @throws IOException in case of error
+ */
+ public static String[] parseLine(String s) throws IOException {
+ if (s == null) {
+ throw new IllegalArgumentException("Null argument not allowed.");
+ }
+ // uh,jh: make sure that parseLine("").length == 0
+ if (s.length() == 0) {
+ return EMPTY_STRING_ARRAY;
+ }
+ return (new CSVParser(new StringReader(s))).getLine();
}
- return result;
- }
-
- /**
- * Parses the first line only according to the default {@link CSVStrategy}.
- *
- * Parsing empty string will be handled as valid records containing zero
- * elements, so the following property holds: parseLine("").length == 0.
- *
- * @param s CSV String to be parsed.
- * @return parsed String vector (which is never null)
- * @throws IOException in case of error
- */
- public static String[] parseLine(String s) throws IOException {
- if (s == null) {
- throw new IllegalArgumentException("Null argument not allowed.");
- }
- // uh,jh: make sure that parseLine("").length == 0
- if (s.length() == 0) {
- return EMPTY_STRING_ARRAY;
- }
- return (new CSVParser(new StringReader(s))).getLine();
- }
-
+
}
diff --git a/src/java/org/apache/commons/csv/CharBuffer.java b/src/java/org/apache/commons/csv/CharBuffer.java
index d1ccc801..94bffebb 100644
--- a/src/java/org/apache/commons/csv/CharBuffer.java
+++ b/src/java/org/apache/commons/csv/CharBuffer.java
@@ -19,11 +19,11 @@
package org.apache.commons.csv;
/**
- * A simple StringBuffer replacement that aims to
+ * A simple StringBuffer replacement that aims to
* reduce copying as much as possible. The buffer
* grows as necessary.
* This class is not thread safe.
- *
+ *
* @author Ortwin Glïż½ck
*/
public class CharBuffer {
@@ -31,21 +31,21 @@ public class CharBuffer {
private char[] c;
/**
- * Actually used number of characters in the array.
+ * Actually used number of characters in the array.
* It is also the index at which
- * a new character will be inserted into c
.
- */
+ * a new character will be inserted into c
.
+ */
private int length;
-
+
/**
* Creates a new CharBuffer with an initial capacity of 32 characters.
*/
public CharBuffer() {
this(32);
}
-
+
/**
- * Creates a new CharBuffer with an initial capacity
+ * Creates a new CharBuffer with an initial capacity
* of length
characters.
*/
public CharBuffer(final int length) {
@@ -54,16 +54,17 @@ public class CharBuffer {
}
this.c = new char[length];
}
-
+
/**
* Empties the buffer. The capacity still remains the same, so no memory is freed.
*/
public void clear() {
length = 0;
}
-
+
/**
* Returns the number of characters in the buffer.
+ *
* @return the number of characters
*/
public int length() {
@@ -72,16 +73,18 @@ public class CharBuffer {
/**
* Returns the current capacity of the buffer.
+ *
* @return the maximum number of characters that can be stored in this buffer without
- * resizing it.
+ * resizing it.
*/
public int capacity() {
return c.length;
}
-
+
/**
* Appends the contents of cb
to the end of this CharBuffer.
+ *
* @param cb the CharBuffer to append or null
*/
public void append(final CharBuffer cb) {
@@ -92,10 +95,11 @@ public class CharBuffer {
System.arraycopy(cb.c, 0, c, length, cb.length);
length += cb.length;
}
-
+
/**
* Appends s
to the end of this CharBuffer.
* This method involves copying the new data once!
+ *
* @param s the String to append or null
*/
public void append(final String s) {
@@ -104,10 +108,11 @@ public class CharBuffer {
}
append(s.toCharArray());
}
-
+
/**
* Appends sb
to the end of this CharBuffer.
* This method involves copying the new data once!
+ *
* @param sb the StringBuffer to append or null
*/
public void append(final StringBuffer sb) {
@@ -118,10 +123,11 @@ public class CharBuffer {
sb.getChars(0, sb.length(), c, length);
length += sb.length();
}
-
+
/**
* Appends data
to the end of this CharBuffer.
* This method involves copying the new data once!
+ *
* @param data the char[] to append or null
*/
public void append(final char[] data) {
@@ -132,10 +138,11 @@ public class CharBuffer {
System.arraycopy(data, 0, c, length, data.length);
length += data.length;
}
-
+
/**
* Appends a single character to the end of this CharBuffer.
* This method involves copying the new data once!
+ *
* @param data the char to append
*/
public void append(final char data) {
@@ -143,7 +150,7 @@ public class CharBuffer {
c[length] = data;
length++;
}
-
+
/**
* Shrinks the capacity of the buffer to the current length if necessary.
* This method involves copying the data once!
@@ -157,13 +164,13 @@ public class CharBuffer {
c = newc;
}
- /**
- * Removes trailing whitespace.
- */
+ /**
+ * Removes trailing whitespace.
+ */
public void trimTrailingWhitespace() {
- while (length>0 && Character.isWhitespace(c[length-1])) {
- length--;
- }
+ while (length > 0 && Character.isWhitespace(c[length - 1])) {
+ length--;
+ }
}
/**
@@ -172,6 +179,7 @@ public class CharBuffer {
* modifying it.
* This method allows to avoid copying if the caller knows the exact capacity
* before.
+ *
* @return
*/
public char[] getCharacters() {
@@ -183,16 +191,17 @@ public class CharBuffer {
return chars;
}
- /**
- * Returns the character at the specified position.
- */
+ /**
+ * Returns the character at the specified position.
+ */
public char charAt(int pos) {
- return c[pos];
- }
+ return c[pos];
+ }
/**
* Converts the contents of the buffer into a StringBuffer.
* This method involves copying the new data once!
+ *
* @return
*/
public StringBuffer toStringBuffer() {
@@ -200,25 +209,27 @@ public class CharBuffer {
sb.append(c, 0, length);
return sb;
}
-
+
/**
* Converts the contents of the buffer into a StringBuffer.
* This method involves copying the new data once!
+ *
* @return
*/
public String toString() {
return new String(c, 0, length);
}
-
+
/**
* Copies the data into a new array of at least capacity
size.
+ *
* @param capacity
*/
public void provideCapacity(final int capacity) {
if (c.length >= capacity) {
return;
}
- int newcapacity = ((capacity*3)>>1) + 1;
+ int newcapacity = ((capacity * 3) >> 1) + 1;
char[] newc = new char[newcapacity];
System.arraycopy(c, 0, newc, 0, length);
c = newc;
diff --git a/src/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 1b60f155..0323856a 100644
--- a/src/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -23,214 +23,223 @@ import java.io.Reader;
/**
* ExtendedBufferedReader
*
- * A special reader decorater which supports more
+ * A special reader decorator which supports more
* sophisticated access to the underlying reader object.
- *
+ *
* In particular the reader supports a look-ahead option,
* which allows you to see the next char returned by
* next().
- *
*/
-class ExtendedBufferedReader extends BufferedReader {
+class ExtendedBufferedReader extends BufferedReader {
-
- /** the end of stream symbol */
- public static final int END_OF_STREAM = -1;
- /** undefined state for the lookahead char */
- public static final int UNDEFINED = -2;
-
- /** the lookahead chars */
- private int lookaheadChar = UNDEFINED;
- /** the last char returned */
- private int lastChar = UNDEFINED;
- /** the line counter */
- private int lineCounter = 0;
- private CharBuffer line = new CharBuffer();
-
- /**
- * Created extended buffered reader using default buffer-size
- *
- */
- public ExtendedBufferedReader(Reader r) {
- super(r);
- /* note uh: do not fetch the first char here,
- * because this might block the method!
+
+ /**
+ * the end of stream symbol
*/
- }
-
- /**
- * Create extended buffered reader using the given buffer-size
- */
- public ExtendedBufferedReader(Reader r, int bufSize) {
- super(r, bufSize);
- /* note uh: do not fetch the first char here,
- * because this might block the method!
+ public static final int END_OF_STREAM = -1;
+ /**
+ * undefined state for the lookahead char
*/
- }
-
- /**
- * Reads the next char from the input stream.
- * @return the next char or END_OF_STREAM if end of stream has been reached.
- */
- public int read() throws IOException {
- // initalize the lookahead
- if (lookaheadChar == UNDEFINED) {
- lookaheadChar = super.read();
+ public static final int UNDEFINED = -2;
+
+ /**
+ * the lookahead chars
+ */
+ private int lookaheadChar = UNDEFINED;
+ /**
+ * the last char returned
+ */
+ private int lastChar = UNDEFINED;
+ /**
+ * the line counter
+ */
+ private int lineCounter = 0;
+ private CharBuffer line = new CharBuffer();
+
+ /**
+ * Created extended buffered reader using default buffer-size
+ */
+ public ExtendedBufferedReader(Reader r) {
+ super(r);
+ /* note uh: do not fetch the first char here,
+ * because this might block the method!
+ */
}
- lastChar = lookaheadChar;
- if (super.ready()) {
- lookaheadChar = super.read();
- } else {
- lookaheadChar = UNDEFINED;
+
+ /**
+ * Create extended buffered reader using the given buffer-size
+ */
+ public ExtendedBufferedReader(Reader r, int bufSize) {
+ super(r, bufSize);
+ /* note uh: do not fetch the first char here,
+ * because this might block the method!
+ */
}
- if (lastChar == '\n') {
- lineCounter++;
- }
- return lastChar;
- }
-
- /**
- * Returns the last read character again.
- *
- * @return the last read char or UNDEFINED
- */
- public int readAgain() {
- return lastChar;
- }
-
- /**
- * Non-blocking reading of len chars into buffer buf starting
- * at bufferposition off.
- *
- * performs an iteratative read on the underlying stream
- * as long as the following conditions hold:
- * - less than len chars have been read
- * - end of stream has not been reached
- * - next read is not blocking
- *
- * @return nof chars actually read or END_OF_STREAM
- */
- public int read(char[] buf, int off, int len) throws IOException {
- // do not claim if len == 0
- if (len == 0) {
- return 0;
- }
-
- // init lookahead, but do not block !!
- if (lookaheadChar == UNDEFINED) {
- if (ready()) {
- lookaheadChar = super.read();
+
+ /**
+ * Reads the next char from the input stream.
+ *
+ * @return the next char or END_OF_STREAM if end of stream has been reached.
+ */
+ public int read() throws IOException {
+ // initialize the lookahead
+ if (lookaheadChar == UNDEFINED) {
+ lookaheadChar = super.read();
+ }
+ lastChar = lookaheadChar;
+ if (super.ready()) {
+ lookaheadChar = super.read();
} else {
- return -1;
+ lookaheadChar = UNDEFINED;
+ }
+ if (lastChar == '\n') {
+ lineCounter++;
+ }
+ return lastChar;
+ }
+
+ /**
+ * Returns the last read character again.
+ *
+ * @return the last read char or UNDEFINED
+ */
+ public int readAgain() {
+ return lastChar;
+ }
+
+ /**
+ * Non-blocking reading of len chars into buffer buf starting
+ * at bufferposition off.
+ *
+ * performs an iterative read on the underlying stream
+ * as long as the following conditions hold:
+ * - less than len chars have been read
+ * - end of stream has not been reached
+ * - next read is not blocking
+ *
+ * @return nof chars actually read or END_OF_STREAM
+ */
+ public int read(char[] buf, int off, int len) throws IOException {
+ // do not claim if len == 0
+ if (len == 0) {
+ return 0;
+ }
+
+ // init lookahead, but do not block !!
+ if (lookaheadChar == UNDEFINED) {
+ if (ready()) {
+ lookaheadChar = super.read();
+ } else {
+ return -1;
+ }
+ }
+ // 'first read of underlying stream'
+ if (lookaheadChar == -1) {
+ return -1;
+ }
+ // continue until the lookaheadChar would block
+ int cOff = off;
+ while (len > 0 && ready()) {
+ if (lookaheadChar == -1) {
+ // eof stream reached, do not continue
+ return cOff - off;
+ } else {
+ buf[cOff++] = (char) lookaheadChar;
+ if (lookaheadChar == '\n') {
+ lineCounter++;
+ }
+ lastChar = lookaheadChar;
+ lookaheadChar = super.read();
+ len--;
+ }
+ }
+ return cOff - off;
+ }
+
+ /**
+ * @return A String containing the contents of the line, not
+ * including any line-termination characters, or null
+ * if the end of the stream has been reached
+ */
+ public String readLine() throws IOException {
+
+ if (lookaheadChar == UNDEFINED) {
+ lookaheadChar = super.read();
+ }
+
+ line.clear(); //reuse
+
+ // return null if end of stream has been reached
+ if (lookaheadChar == END_OF_STREAM) {
+ return null;
+ }
+ // do we have a line termination already
+ char laChar = (char) lookaheadChar;
+ if (laChar == '\n' || laChar == '\r') {
+ lastChar = lookaheadChar;
+ lookaheadChar = super.read();
+ // ignore '\r\n' as well
+ if ((char) lookaheadChar == '\n') {
+ lastChar = lookaheadChar;
+ lookaheadChar = super.read();
+ }
+ lineCounter++;
+ return line.toString();
+ }
+
+ // create the rest-of-line return and update the lookahead
+ line.append(laChar);
+ String restOfLine = super.readLine(); // TODO involves copying
+ lastChar = lookaheadChar;
+ lookaheadChar = super.read();
+ if (restOfLine != null) {
+ line.append(restOfLine);
+ }
+ lineCounter++;
+ return line.toString();
+ }
+
+ /**
+ * Unsupported
+ */
+ public long skip(long n) throws IllegalArgumentException, IOException {
+ throw new UnsupportedOperationException("CSV has no reason to implement this");
+ }
+
+ /**
+ * Returns the next char in the stream without consuming it.
+ *
+ * Remember the next char read by read(..) will always be
+ * identical to lookAhead().
+ *
+ * @return the next char (without consuming it) or END_OF_STREAM
+ */
+ public int lookAhead() throws IOException {
+ if (lookaheadChar == UNDEFINED) {
+ lookaheadChar = super.read();
+ }
+ return lookaheadChar;
+ }
+
+
+ /**
+ * Returns the nof line read
+ *
+ * @return the current-line-number (or -1)
+ */
+ public int getLineNumber() {
+ if (lineCounter > -1) {
+ return lineCounter;
+ } else {
+ return -1;
}
}
- // 'first read of underlying stream'
- if (lookaheadChar == -1) {
- return -1;
- }
- // continue until the lookaheadChar would block
- int cOff = off;
- while (len > 0 && ready()) {
- if (lookaheadChar == -1) {
- // eof stream reached, do not continue
- return cOff - off;
- } else {
- buf[cOff++] = (char) lookaheadChar;
- if (lookaheadChar == '\n') {
- lineCounter++;
- }
- lastChar = lookaheadChar;
- lookaheadChar = super.read();
- len--;
- }
- }
- return cOff - off;
- }
-
- /**
- * @return A String containing the contents of the line, not
- * including any line-termination characters, or null
- * if the end of the stream has been reached
- */
- public String readLine() throws IOException {
-
- if (lookaheadChar == UNDEFINED) {
- lookaheadChar = super.read();
- }
-
- line.clear(); //reuse
-
- // return null if end of stream has been reached
- if (lookaheadChar == END_OF_STREAM) {
- return null;
- }
- // do we have a line termination already
- char laChar = (char) lookaheadChar;
- if (laChar == '\n' || laChar == '\r') {
- lastChar = lookaheadChar;
- lookaheadChar = super.read();
- // ignore '\r\n' as well
- if ((char) lookaheadChar == '\n') {
- lastChar = lookaheadChar;
- lookaheadChar = super.read();
- }
- lineCounter++;
- return line.toString();
- }
-
- // create the rest-of-line return and update the lookahead
- line.append(laChar);
- String restOfLine = super.readLine(); // TODO involves copying
- lastChar = lookaheadChar;
- lookaheadChar = super.read();
- if (restOfLine != null) {
- line.append(restOfLine);
- }
- lineCounter++;
- return line.toString();
- }
-
- /**
- * Unsupported
- */
- public long skip(long n) throws IllegalArgumentException, IOException {
- throw new UnsupportedOperationException("CSV has no reason to implement this");
- }
-
- /**
- * Returns the next char in the stream without consuming it.
- *
- * Remember the next char read by read(..) will always be
- * identical to lookAhead().
- *
- * @return the next char (without consuming it) or END_OF_STREAM
- */
- public int lookAhead() throws IOException {
- if (lookaheadChar == UNDEFINED) {
- lookaheadChar = super.read();
- }
- return lookaheadChar;
- }
-
-
- /**
- * Returns the nof line read
- *
- * @return the current-line-number (or -1)
- */
- public int getLineNumber() {
- if (lineCounter > -1) {
- return lineCounter;
- } else {
- return -1;
- }
- }
- /**
- * Unsupported
- */
- public boolean markSupported() {
- throw new UnsupportedOperationException("CSV has no reason to implement this");
- }
-
+ /**
+ * Unsupported
+ */
+ public boolean markSupported() {
+ throw new UnsupportedOperationException("CSV has no reason to implement this");
+ }
+
}
diff --git a/src/java/org/apache/commons/csv/writer/CSVConfig.java b/src/java/org/apache/commons/csv/writer/CSVConfig.java
index ffcc62a1..74c816c7 100644
--- a/src/java/org/apache/commons/csv/writer/CSVConfig.java
+++ b/src/java/org/apache/commons/csv/writer/CSVConfig.java
@@ -32,69 +32,100 @@ import java.util.List;
*/
public class CSVConfig {
- /** specifies if it is a fixed width csv file **/
+ /**
+ * specifies if it is a fixed width csv file *
+ */
private boolean fixedWidth;
- /** list of fields **/
+ /**
+ * list of fields *
+ */
private List fields;
- /** Do no do any filling **/
- public static final int FILLNONE = 0;
- /** Fill content the the left. Mainly usable together with fixedWidth **/
- public static final int FILLLEFT = 1;
- /** Fill content to the right. Mainly usable together with fixedWidth **/
- public static final int FILLRIGHT = 2;
-
- /** The fill pattern */
- private int fill;
- /** The fill char. Defaults to a space */
- private char fillChar = ' ';
- /** The seperator character. Defaults to , */
- private char delimiter = ',';
- /** The row separator. Defaults to \n */
- private String rowDelimiter = "\n";
- /** Should we ignore the delimiter. Defaults to false */
- private boolean ignoreDelimiter = false;
- /** the value delimiter. Defaults to " */
- private char valueDelimiter = '"';
- /** Should we ignore the value delimiter. Defaults to true */
- private boolean ignoreValueDelimiter = true;
- /** Specifies if we want to use a field header */
- private boolean fieldHeader = false;
- /** Specifies if the end of the line needs to be trimmed */
- private boolean endTrimmed = false;
/**
- *
+ * Do no do any filling *
+ */
+ public static final int FILLNONE = 0;
+ /**
+ * Fill content the the left. Mainly usable together with fixedWidth *
+ */
+ public static final int FILLLEFT = 1;
+ /**
+ * Fill content to the right. Mainly usable together with fixedWidth *
+ */
+ public static final int FILLRIGHT = 2;
+
+ /**
+ * The fill pattern
+ */
+ private int fill;
+ /**
+ * The fill char. Defaults to a space
+ */
+ private char fillChar = ' ';
+ /**
+ * The seperator character. Defaults to ,
+ */
+ private char delimiter = ',';
+ /**
+ * The row separator. Defaults to \n
+ */
+ private String rowDelimiter = "\n";
+ /**
+ * Should we ignore the delimiter. Defaults to false
+ */
+ private boolean ignoreDelimiter = false;
+ /**
+ * the value delimiter. Defaults to "
+ */
+ private char valueDelimiter = '"';
+ /**
+ * Should we ignore the value delimiter. Defaults to true
+ */
+ private boolean ignoreValueDelimiter = true;
+ /**
+ * Specifies if we want to use a field header
+ */
+ private boolean fieldHeader = false;
+ /**
+ * Specifies if the end of the line needs to be trimmed
+ */
+ private boolean endTrimmed = false;
+
+ /**
+ *
*/
public CSVConfig() {
super();
}
-
+
/**
* @return if the CSV file is fixedWidth
*/
public boolean isFixedWidth() {
return fixedWidth;
}
-
+
/**
* Specify if the CSV file is fixed width.
* Defaults to false
+ *
* @param fixedWidth the fixedwidth
*/
public void setFixedWidth(boolean fixedWidth) {
this.fixedWidth = fixedWidth;
}
-
+
public void addField(CSVField field) {
if (fields == null) {
fields = new ArrayList();
}
fields.add(field);
}
-
+
/**
* Set the fields that should be used by the writer.
* This will overwrite currently added fields completely!
+ *
* @param csvFields the csvfields array. If null it will do nothing
*/
public void setFields(CSVField[] csvFields) {
@@ -103,9 +134,10 @@ public class CSVConfig {
}
fields = new ArrayList(Arrays.asList(csvFields));
}
-
+
/**
* Set the fields that should be used by the writer
+ *
* @param csvField a collection with fields. If null it will do nothing
*/
public void setFields(Collection csvField) {
@@ -125,12 +157,12 @@ public class CSVConfig {
}
return csvFields;
}
-
+
public CSVField getField(String name) {
if (fields == null || name == null) {
return null;
}
- for(int i = 0; i < fields.size(); i++) {
+ for (int i = 0; i < fields.size(); i++) {
CSVField field = (CSVField) fields.get(i);
if (name.equals(field.getName())) {
return field;
@@ -149,6 +181,7 @@ public class CSVConfig {
/**
* Set the fill pattern. Defaults to {@link #FILLNONE}
*
Other options are : {@link #FILLLEFT} and {@link #FILLRIGHT}
+ *
* @param fill the fill pattern.
*/
public void setFill(int fill) {
@@ -156,7 +189,6 @@ public class CSVConfig {
}
/**
- *
* @return the fillchar. Defaults to a space.
*/
public char getFillChar() {
@@ -165,6 +197,7 @@ public class CSVConfig {
/**
* Set the fill char
+ *
* @param fillChar the fill char
*/
public void setFillChar(char fillChar) {
@@ -180,6 +213,7 @@ public class CSVConfig {
/**
* Set the delimiter to use
+ *
* @param delimiter the delimiter character.
*/
public void setDelimiter(char delimiter) {
@@ -195,6 +229,7 @@ public class CSVConfig {
/**
* Set the rowDelimiter to use
+ *
* @param rowDelimiter the row delimiter character.
*/
public void setRowDelimiter(String rowDelimiter) {
@@ -209,7 +244,8 @@ public class CSVConfig {
}
/**
- * Specify if the writer should ignore the delimiter.
+ * Specify if the writer should ignore the delimiter.
+ *
* @param ignoreDelimiter defaults to false.
*/
public void setIgnoreDelimiter(boolean ignoreDelimiter) {
@@ -225,6 +261,7 @@ public class CSVConfig {
/**
* Set the value delimiter to use
+ *
* @param valueDelimiter the value delimiter character.
*/
public void setValueDelimiter(char valueDelimiter) {
@@ -240,7 +277,8 @@ public class CSVConfig {
}
/**
- * Specify if the writer should ignore the value delimiter.
+ * Specify if the writer should ignore the value delimiter.
+ *
* @param ignoreValueDelimiter defaults to false.
*/
public void setIgnoreValueDelimiter(boolean ignoreValueDelimiter) {
@@ -253,16 +291,19 @@ public class CSVConfig {
public boolean isFieldHeader() {
return fieldHeader;
}
+
/**
* Specify if you want to use a field header.
+ *
* @param fieldHeader true or false.
*/
public void setFieldHeader(boolean fieldHeader) {
this.fieldHeader = fieldHeader;
}
-
+
/**
* TODO..
+ *
* @see java.lang.Object#equals(java.lang.Object)
*/
public boolean equals(Object obj) {
@@ -278,8 +319,9 @@ public class CSVConfig {
/**
* Creates a config based on a stream. It tries to guess
* NOTE : The stream will be closed.
- * @param inputStream the inputstream.
- * @return the guessed config.
+ *
+ * @param inputStream the inputstream.
+ * @return the guessed config.
*/
public static CSVConfig guessConfig(InputStream inputStream) {
return null;
@@ -294,11 +336,12 @@ public class CSVConfig {
/**
* Specify if the end of the line needs to be trimmed. Defaults to false.
+ *
* @param endTrimmed
*/
public void setEndTrimmed(boolean endTrimmed) {
this.endTrimmed = endTrimmed;
}
-
+
}
diff --git a/src/java/org/apache/commons/csv/writer/CSVConfigGuesser.java b/src/java/org/apache/commons/csv/writer/CSVConfigGuesser.java
index e811f8b7..2b30686b 100644
--- a/src/java/org/apache/commons/csv/writer/CSVConfigGuesser.java
+++ b/src/java/org/apache/commons/csv/writer/CSVConfigGuesser.java
@@ -30,23 +30,27 @@ import java.io.InputStreamReader;
*/
public class CSVConfigGuesser {
- /** The stream to read */
+ /**
+ * The stream to read
+ */
private InputStream in;
- /**
+ /**
* if the file has a field header (need this info, to be able to guess better)
* Defaults to false
*/
private boolean hasFieldHeader = false;
- /** The found config */
- protected CSVConfig config;
-
/**
- *
+ * The found config
+ */
+ protected CSVConfig config;
+
+ /**
+ *
*/
public CSVConfigGuesser() {
this.config = new CSVConfig();
}
-
+
/**
* @param in the inputstream to guess from
*/
@@ -54,23 +58,24 @@ public class CSVConfigGuesser {
this();
setInputStream(in);
}
-
+
public void setInputStream(InputStream in) {
this.in = in;
}
-
+
/**
* Allow override.
+ *
* @return the inputstream that was set.
*/
protected InputStream getInputStream() {
return in;
}
-
+
/**
- * Guess the config based on the first 10 (or less when less available)
+ * Guess the config based on the first 10 (or less when less available)
* records of a CSV file.
- *
+ *
* @return the guessed config.
*/
public CSVConfig guess() {
@@ -80,7 +85,7 @@ public class CSVConfigGuesser {
String[] lines = new String[10];
String line = null;
int counter = 0;
- while ( (line = bIn.readLine()) != null && counter <= 10) {
+ while ((line = bIn.readLine()) != null && counter <= 10) {
lines[counter] = line;
counter++;
}
@@ -91,13 +96,13 @@ public class CSVConfigGuesser {
lines = newLines;
}
analyseLines(lines);
- } catch(Exception e) {
+ } catch (Exception e) {
e.printStackTrace();
} finally {
if (in != null) {
try {
in.close();
- } catch(Exception e) {
+ } catch (Exception e) {
// ignore exception.
}
}
@@ -107,15 +112,16 @@ public class CSVConfigGuesser {
config = null;
return conf;
}
-
+
protected void analyseLines(String[] lines) {
guessFixedWidth(lines);
guessFieldSeperator(lines);
}
-
+
/**
* Guess if this file is fixedwidth.
* Just basing the fact on all lines being of the same length
+ *
* @param lines
*/
protected void guessFixedWidth(String[] lines) {
@@ -132,7 +138,7 @@ public class CSVConfigGuesser {
}
}
}
-
+
protected void guessFieldSeperator(String[] lines) {
if (config.isFixedWidth()) {
@@ -142,7 +148,7 @@ public class CSVConfigGuesser {
for (int i = 0; i < lines.length; i++) {
}
}
-
+
protected void guessFixedWidthSeperator(String[] lines) {
// keep track of the fieldlength
int previousMatch = -1;
@@ -156,21 +162,21 @@ public class CSVConfigGuesser {
if (last != lines[j].charAt(i)) {
charMatches = false;
break;
- }
+ }
}
if (charMatches) {
if (previousMatch == -1) {
previousMatch = 0;
}
CSVField field = new CSVField();
- field.setName("field"+config.getFields().length+1);
- field.setSize((i-previousMatch));
+ field.setName("field" + config.getFields().length + 1);
+ field.setSize((i - previousMatch));
config.addField(field);
}
}
}
+
/**
- *
* @return if the field uses a field header. Defaults to false.
*/
public boolean hasFieldHeader() {
@@ -179,11 +185,12 @@ public class CSVConfigGuesser {
/**
* Specify if the CSV file has a field header
+ *
* @param hasFieldHeader true or false
*/
public void setHasFieldHeader(boolean hasFieldHeader) {
this.hasFieldHeader = hasFieldHeader;
}
-
-
+
+
}
diff --git a/src/java/org/apache/commons/csv/writer/CSVField.java b/src/java/org/apache/commons/csv/writer/CSVField.java
index 3b67a42d..45936675 100644
--- a/src/java/org/apache/commons/csv/writer/CSVField.java
+++ b/src/java/org/apache/commons/csv/writer/CSVField.java
@@ -20,7 +20,6 @@ package org.apache.commons.csv.writer;
/**
- *
* @author Martin van den Bemt
* @version $Id: $
*/
@@ -32,7 +31,7 @@ public class CSVField {
private boolean overrideFill;
/**
- *
+ *
*/
public CSVField() {
}
@@ -59,9 +58,10 @@ public class CSVField {
public String getName() {
return name;
}
-
+
/**
* Set the name of the field
+ *
* @param name the name
*/
public void setName(String name) {
@@ -69,7 +69,6 @@ public class CSVField {
}
/**
- *
* @return the size of the field
*/
public int getSize() {
@@ -79,6 +78,7 @@ public class CSVField {
/**
* Set the size of the field.
* The size will be ignored when fixedwidth is set to false in the CSVConfig
+ *
* @param size the size of the field.
*/
public void setSize(int size) {
@@ -94,16 +94,17 @@ public class CSVField {
/**
* Sets overrideFill to true.
+ *
* @param fill the file pattern
*/
public void setFill(int fill) {
overrideFill = true;
this.fill = fill;
}
-
+
/**
* Does this field override fill ?
- *
+ *
* @return
*/
public boolean overrideFill() {
diff --git a/src/java/org/apache/commons/csv/writer/CSVWriter.java b/src/java/org/apache/commons/csv/writer/CSVWriter.java
index 5ab7dfc6..ce79ca16 100644
--- a/src/java/org/apache/commons/csv/writer/CSVWriter.java
+++ b/src/java/org/apache/commons/csv/writer/CSVWriter.java
@@ -31,16 +31,21 @@ import java.util.Map;
*/
public class CSVWriter {
- /** The CSV config **/
- private CSVConfig config;
- /** The writer **/
- private Writer writer;
/**
- *
+ * The CSV config *
+ */
+ private CSVConfig config;
+ /**
+ * The writer *
+ */
+ private Writer writer;
+
+ /**
+ *
*/
public CSVWriter() {
}
-
+
public CSVWriter(CSVConfig config) {
setConfig(config);
}
@@ -56,12 +61,12 @@ public class CSVWriter {
value = writeValue(fields[i], value);
sb.append(value);
}
- if (!config.isDelimiterIgnored() && fields.length != (i+1)) {
+ if (!config.isDelimiterIgnored() && fields.length != (i + 1)) {
sb.append(config.getDelimiter());
}
}
if (config.isEndTrimmed()) {
- for (int i = sb.length()-1; i >= 0; i--) {
+ for (int i = sb.length() - 1; i >= 0; i--) {
System.out.println("i : " + i);
if (Character.isWhitespace(sb.charAt(i))) {
sb.deleteCharAt(i);
@@ -73,11 +78,11 @@ public class CSVWriter {
sb.append(config.getRowDelimiter());
String line = sb.toString();
writer.write(line);
- } catch(Exception e) {
+ } catch (Exception e) {
e.printStackTrace();
}
}
-
+
protected String writeValue(CSVField field, String value) throws Exception {
if (config.isFixedWidth()) {
if (value.length() < field.getSize()) {
@@ -106,11 +111,11 @@ public class CSVWriter {
}
if (!config.isValueDelimiterIgnored()) {
// add the value delimiter..
- value = config.getValueDelimiter()+value+config.getValueDelimiter();
+ value = config.getValueDelimiter() + value + config.getValueDelimiter();
}
return value;
}
-
+
/**
* @return the CVSConfig or null if not present
*/
@@ -120,14 +125,16 @@ public class CSVWriter {
/**
* Set the CSVConfig
+ *
* @param config the CVSConfig
*/
public void setConfig(CSVConfig config) {
this.config = config;
}
-
+
/**
* Set the writer to write the CSV file to.
+ *
* @param writer the writer.
*/
public void setWriter(Writer writer) {
diff --git a/src/test/org/apache/commons/csv/CSVParserTest.java b/src/test/org/apache/commons/csv/CSVParserTest.java
index 8212c2b3..ecee9c7e 100644
--- a/src/test/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/org/apache/commons/csv/CSVParserTest.java
@@ -27,569 +27,573 @@ import junit.framework.TestCase;
* CSVParserTest
*
* The test are organized in three different sections:
- * The 'setter/getter' section, the lexer section and finally the parser
- * section. In case a test fails, you should follow a top-down approach for
+ * The 'setter/getter' section, the lexer section and finally the parser
+ * section. In case a test fails, you should follow a top-down approach for
* fixing a potential bug (its likely that the parser itself fails if the lexer
* has problems...).
*/
public class CSVParserTest extends TestCase {
-
- /**
- * TestCSVParser.
- */
- class TestCSVParser extends CSVParser {
+
/**
- * Test parser to investigate the type of the internal Token.
- * @param in a Reader
+ * TestCSVParser.
*/
- TestCSVParser(Reader in) {
- super(in);
+ class TestCSVParser extends CSVParser {
+ /**
+ * Test parser to investigate the type of the internal Token.
+ *
+ * @param in a Reader
+ */
+ TestCSVParser(Reader in) {
+ super(in);
+ }
+
+ TestCSVParser(Reader in, CSVStrategy strategy) {
+ super(in, strategy);
+ }
+
+ /**
+ * Calls super.nextToken() and prints out a String representation of token
+ * type and content.
+ *
+ * @return String representation of token type and content
+ * @throws IOException like {@link CSVParser#nextToken()}
+ */
+ public String testNextToken() throws IOException {
+ Token t = super.nextToken();
+ return Integer.toString(t.type) + ";" + t.content + ";";
+ }
}
- TestCSVParser(Reader in, CSVStrategy strategy) {
- super(in, strategy);
+ // ======================================================
+ // lexer tests
+ // ======================================================
+
+ // Single line (without comment)
+ public void testNextToken1() throws IOException {
+ String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
+ TestCSVParser parser = new TestCSVParser(new StringReader(code));
+ assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";lmnop;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";qrst;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";uv;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";wxy;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";z;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
}
- /**
- * Calls super.nextToken() and prints out a String representation of token
- * type and content.
- * @return String representation of token type and content
- * @throws IOException like {@link CSVParser#nextToken()}
- */
- public String testNextToken() throws IOException {
- Token t = super.nextToken();
- return Integer.toString(t.type) + ";" + t.content + ";";
+
+ // multiline including comments (and empty lines)
+ public void testNextToken2() throws IOException {
+ /* file: 1,2,3,
+ * a,b x,c
+ *
+ * # this is a comment
+ * d,e,
+ *
+ */
+ String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
+ CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
+ // strategy.setIgnoreEmptyLines(false);
+ strategy.setCommentStart('#');
+
+ TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
+
+
+ assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";b x;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EORECORD + ";c;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";d;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";e;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
+
}
- }
-
- // ======================================================
- // lexer tests
- // ======================================================
-
- // Single line (without comment)
- public void testNextToken1() throws IOException {
- String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
- TestCSVParser parser = new TestCSVParser(new StringReader(code));
- assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";lmnop;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";qrst;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";uv;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";wxy;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";z;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
- assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
- }
-
- // multiline including comments (and empty lines)
- public void testNextToken2() throws IOException {
- /* file: 1,2,3,
- * a,b x,c
- *
- * # this is a comment
- * d,e,
- *
- */
- String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
- CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
- // strategy.setIgnoreEmptyLines(false);
- strategy.setCommentStart('#');
- TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
+ // simple token with escaping
+ public void testNextToken3() throws IOException {
+ /* file: a,\,,b
+ * \,,
+ */
+ String code = "a,\\,,b\n\\,,";
+ CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
+ strategy.setCommentStart('#');
+ TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
+ assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
+ // an unquoted single backslash is not an escape char
+ assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
+ // an unquoted single backslash is not an escape char
+ assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
+ }
- assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken());
- assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";b x;", parser.testNextToken());
- assertEquals(CSVParser.TT_EORECORD + ";c;", parser.testNextToken());
- assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";d;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";e;", parser.testNextToken());
- assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken());
- assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
- assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
-
- }
-
- // simple token with escaping
- public void testNextToken3() throws IOException {
- /* file: a,\,,b
- * \,,
- */
- String code = "a,\\,,b\n\\,,";
- CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
- strategy.setCommentStart('#');
- TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
-
- assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
- // an unquoted single backslash is not an escape char
- assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
- assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
- // an unquoted single backslash is not an escape char
- assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
- assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken());
- }
-
- // encapsulator tokenizer (sinle line)
- public void testNextToken4() throws IOException {
- /* file: a,"foo",b
- * a, " foo",b
- * a,"foo " ,b // whitespace after closing encapsulator
- * a, " foo " ,b
- */
- String code =
- "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
- TestCSVParser parser = new TestCSVParser(new StringReader(code));
- assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
- assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + "; foo;", parser.testNextToken());
- assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";foo ;", parser.testNextToken());
- assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + "; foo ;", parser.testNextToken());
+ // encapsulator tokenizer (sinle line)
+ public void testNextToken4() throws IOException {
+ /* file: a,"foo",b
+ * a, " foo",b
+ * a,"foo " ,b // whitespace after closing encapsulator
+ * a, " foo " ,b
+ */
+ String code =
+ "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
+ TestCSVParser parser = new TestCSVParser(new StringReader(code));
+ assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + "; foo;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";foo ;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + "; foo ;", parser.testNextToken());
// assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
- assertEquals(CSVParser.TT_EOF + ";b;", parser.testNextToken());
- }
-
- // encapsulator tokenizer (multi line, delimiter in string)
- public void testNextToken5() throws IOException {
- String code =
- "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\"";
- TestCSVParser parser = new TestCSVParser(new StringReader(code));
- assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
- assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
- assertEquals(CSVParser.TT_EORECORD + ";foo\n baar ,,,;",
- parser.testNextToken());
- assertEquals(CSVParser.TT_EOF + ";\n\t \n;", parser.testNextToken());
-
- }
-
- // change delimiters, comment, encapsulater
- public void testNextToken6() throws IOException {
- /* file: a;'b and \' more
- * '
- * !comment;;;;
- * ;;
- */
- String code = "a;'b and '' more\n'\n!comment;;;;\n;;";
- TestCSVParser parser = new TestCSVParser(new StringReader(code), new CSVStrategy(';', '\'', '!'));
- assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
- assertEquals(
- CSVParser.TT_EORECORD + ";b and ' more\n;",
- parser.testNextToken());
- }
-
-
- // ======================================================
- // parser tests
- // ======================================================
-
- String code =
- "a,b,c,d\n"
- + " a , b , 1 2 \n"
- + "\"foo baar\", b,\n"
- // + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
- + " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping
- String[][] res = {
- {"a", "b", "c", "d"},
- {"a", "b", "1 2"},
- {"foo baar", "b", ""},
- {"foo\n,,\n\",,\n\"", "d", "e"}
- };
- public void testGetLine() throws IOException {
- CSVParser parser = new CSVParser(new StringReader(code));
- String[] tmp = null;
- for (int i = 0; i < res.length; i++) {
- tmp = parser.getLine();
- assertTrue(Arrays.equals(res[i], tmp));
+ assertEquals(CSVParser.TT_EOF + ";b;", parser.testNextToken());
}
- tmp = parser.getLine();
- assertTrue(tmp == null);
- }
-
- public void testNextValue() throws IOException {
- CSVParser parser = new CSVParser(new StringReader(code));
- String tmp = null;
- for (int i = 0; i < res.length; i++) {
- for (int j = 0; j < res[i].length; j++) {
+
+ // encapsulator tokenizer (multi line, delimiter in string)
+ public void testNextToken5() throws IOException {
+ String code =
+ "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\"";
+ TestCSVParser parser = new TestCSVParser(new StringReader(code));
+ assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EORECORD + ";foo\n baar ,,,;",
+ parser.testNextToken());
+ assertEquals(CSVParser.TT_EOF + ";\n\t \n;", parser.testNextToken());
+
+ }
+
+ // change delimiters, comment, encapsulater
+ public void testNextToken6() throws IOException {
+ /* file: a;'b and \' more
+ * '
+ * !comment;;;;
+ * ;;
+ */
+ String code = "a;'b and '' more\n'\n!comment;;;;\n;;";
+ TestCSVParser parser = new TestCSVParser(new StringReader(code), new CSVStrategy(';', '\'', '!'));
+ assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
+ assertEquals(
+ CSVParser.TT_EORECORD + ";b and ' more\n;",
+ parser.testNextToken());
+ }
+
+
+ // ======================================================
+ // parser tests
+ // ======================================================
+
+ String code =
+ "a,b,c,d\n"
+ + " a , b , 1 2 \n"
+ + "\"foo baar\", b,\n"
+ // + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n";
+ + " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping
+ String[][] res = {
+ {"a", "b", "c", "d"},
+ {"a", "b", "1 2"},
+ {"foo baar", "b", ""},
+ {"foo\n,,\n\",,\n\"", "d", "e"}
+ };
+
+ public void testGetLine() throws IOException {
+ CSVParser parser = new CSVParser(new StringReader(code));
+ String[] tmp = null;
+ for (int i = 0; i < res.length; i++) {
+ tmp = parser.getLine();
+ assertTrue(Arrays.equals(res[i], tmp));
+ }
+ tmp = parser.getLine();
+ assertTrue(tmp == null);
+ }
+
+ public void testNextValue() throws IOException {
+ CSVParser parser = new CSVParser(new StringReader(code));
+ String tmp = null;
+ for (int i = 0; i < res.length; i++) {
+ for (int j = 0; j < res[i].length; j++) {
+ tmp = parser.nextValue();
+ assertEquals(res[i][j], tmp);
+ }
+ }
tmp = parser.nextValue();
- assertEquals(res[i][j], tmp);
- }
+ assertTrue(tmp == null);
}
- tmp = parser.nextValue();
- assertTrue(tmp == null);
- }
-
- public void testGetAllValues() throws IOException {
- CSVParser parser = new CSVParser(new StringReader(code));
- String[][] tmp = parser.getAllValues();
- assertEquals(res.length, tmp.length);
- assertTrue(tmp.length > 0);
- for (int i = 0; i < res.length; i++) {
- assertTrue(Arrays.equals(res[i], tmp[i]));
+
+ public void testGetAllValues() throws IOException {
+ CSVParser parser = new CSVParser(new StringReader(code));
+ String[][] tmp = parser.getAllValues();
+ assertEquals(res.length, tmp.length);
+ assertTrue(tmp.length > 0);
+ for (int i = 0; i < res.length; i++) {
+ assertTrue(Arrays.equals(res[i], tmp[i]));
+ }
}
- }
-
- public void testExcelStrategy1() throws IOException {
- String code =
- "value1,value2,value3,value4\r\na,b,c,d\r\n x,,,"
- + "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n";
- String[][] res = {
- {"value1", "value2", "value3", "value4"},
- {"a", "b", "c", "d"},
- {" x", "", "", ""},
- {""},
- {"\"hello\"", " \"world\"", "abc\ndef", ""}
- };
- CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
- String[][] tmp = parser.getAllValues();
- assertEquals(res.length, tmp.length);
- assertTrue(tmp.length > 0);
- for (int i = 0; i < res.length; i++) {
- assertTrue(Arrays.equals(res[i], tmp[i]));
- }
- }
-
- public void testExcelStrategy2() throws Exception {
- String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n";
- String[][] res = {
- {"foo", "baar"},
- {""},
- {"hello", ""},
- {""},
- {"world", ""}
- };
- CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
- String[][] tmp = parser.getAllValues();
- assertEquals(res.length, tmp.length);
- assertTrue(tmp.length > 0);
- for (int i = 0; i < res.length; i++) {
- assertTrue(Arrays.equals(res[i], tmp[i]));
- }
- }
-
- public void testEndOfFileBehaviourExcel() throws Exception {
- String[] codes = {
- "hello,\r\n\r\nworld,\r\n",
- "hello,\r\n\r\nworld,",
- "hello,\r\n\r\nworld,\"\"\r\n",
- "hello,\r\n\r\nworld,\"\"",
- "hello,\r\n\r\nworld,\n",
- "hello,\r\n\r\nworld,",
- "hello,\r\n\r\nworld,\"\"\n",
- "hello,\r\n\r\nworld,\"\""
+
+ public void testExcelStrategy1() throws IOException {
+ String code =
+ "value1,value2,value3,value4\r\na,b,c,d\r\n x,,,"
+ + "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n";
+ String[][] res = {
+ {"value1", "value2", "value3", "value4"},
+ {"a", "b", "c", "d"},
+ {" x", "", "", ""},
+ {""},
+ {"\"hello\"", " \"world\"", "abc\ndef", ""}
};
- String[][] res = {
- {"hello", ""},
- {""}, // ExcelStrategy does not ignore empty lines
- {"world", ""}
- };
- String code;
- for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
- code = codes[codeIndex];
- CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
- String[][] tmp = parser.getAllValues();
- assertEquals(res.length, tmp.length);
- assertTrue(tmp.length > 0);
- for (int i = 0; i < res.length; i++) {
- assertTrue(Arrays.equals(res[i], tmp[i]));
- }
+ CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
+ String[][] tmp = parser.getAllValues();
+ assertEquals(res.length, tmp.length);
+ assertTrue(tmp.length > 0);
+ for (int i = 0; i < res.length; i++) {
+ assertTrue(Arrays.equals(res[i], tmp[i]));
+ }
}
- }
-
- public void testEndOfFileBehaviorCSV() throws Exception {
- String[] codes = {
- "hello,\r\n\r\nworld,\r\n",
- "hello,\r\n\r\nworld,",
- "hello,\r\n\r\nworld,\"\"\r\n",
- "hello,\r\n\r\nworld,\"\"",
- "hello,\r\n\r\nworld,\n",
- "hello,\r\n\r\nworld,",
- "hello,\r\n\r\nworld,\"\"\n",
- "hello,\r\n\r\nworld,\"\""
+
+ public void testExcelStrategy2() throws Exception {
+ String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n";
+ String[][] res = {
+ {"foo", "baar"},
+ {""},
+ {"hello", ""},
+ {""},
+ {"world", ""}
};
- String[][] res = {
- {"hello", ""}, // CSV Strategy ignores empty lines
- {"world", ""}
- };
- String code;
- for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
- code = codes[codeIndex];
- CSVParser parser = new CSVParser(new StringReader(code));
- String[][] tmp = parser.getAllValues();
- assertEquals(res.length, tmp.length);
- assertTrue(tmp.length > 0);
- for (int i = 0; i < res.length; i++) {
- assertTrue(Arrays.equals(res[i], tmp[i]));
- }
+ CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
+ String[][] tmp = parser.getAllValues();
+ assertEquals(res.length, tmp.length);
+ assertTrue(tmp.length > 0);
+ for (int i = 0; i < res.length; i++) {
+ assertTrue(Arrays.equals(res[i], tmp[i]));
+ }
}
- }
-
- public void testEmptyLineBehaviourExcel() throws Exception {
- String[] codes = {
- "hello,\r\n\r\n\r\n",
- "hello,\n\n\n",
- "hello,\"\"\r\n\r\n\r\n",
- "hello,\"\"\n\n\n"
+
+ public void testEndOfFileBehaviourExcel() throws Exception {
+ String[] codes = {
+ "hello,\r\n\r\nworld,\r\n",
+ "hello,\r\n\r\nworld,",
+ "hello,\r\n\r\nworld,\"\"\r\n",
+ "hello,\r\n\r\nworld,\"\"",
+ "hello,\r\n\r\nworld,\n",
+ "hello,\r\n\r\nworld,",
+ "hello,\r\n\r\nworld,\"\"\n",
+ "hello,\r\n\r\nworld,\"\""
};
- String[][] res = {
- {"hello", ""},
- {""}, // ExcelStrategy does not ignore empty lines
- {""}
- };
- String code;
- for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
- code = codes[codeIndex];
- CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
- String[][] tmp = parser.getAllValues();
- assertEquals(res.length, tmp.length);
- assertTrue(tmp.length > 0);
- for (int i = 0; i < res.length; i++) {
- assertTrue(Arrays.equals(res[i], tmp[i]));
- }
- }
- }
-
- public void testEmptyLineBehaviourCSV() throws Exception {
- String[] codes = {
- "hello,\r\n\r\n\r\n",
- "hello,\n\n\n",
- "hello,\"\"\r\n\r\n\r\n",
- "hello,\"\"\n\n\n"
+ String[][] res = {
+ {"hello", ""},
+ {""}, // ExcelStrategy does not ignore empty lines
+ {"world", ""}
};
- String[][] res = {
- {"hello", ""} // CSV Strategy ignores empty lines
- };
- String code;
- for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
- code = codes[codeIndex];
- CSVParser parser = new CSVParser(new StringReader(code));
- String[][] tmp = parser.getAllValues();
- assertEquals(res.length, tmp.length);
- assertTrue(tmp.length > 0);
- for (int i = 0; i < res.length; i++) {
- assertTrue(Arrays.equals(res[i], tmp[i]));
- }
- }
- }
-
- public void OLDtestBackslashEscaping() throws IOException {
- String code =
- "one,two,three\n"
- + "on\\\"e,two\n"
- + "on\"e,two\n"
- + "one,\"tw\\\"o\"\n"
- + "one,\"t\\,wo\"\n"
- + "one,two,\"th,ree\"\n"
- + "\"a\\\\\"\n"
- + "a\\,b\n"
- + "\"a\\\\,b\"";
- String[][] res = {
- { "one", "two", "three" },
- { "on\\\"e", "two" },
- { "on\"e", "two" },
- { "one", "tw\"o" },
- { "one", "t\\,wo" }, // backslash in quotes only escapes a delimiter (",")
- { "one", "two", "th,ree" },
- { "a\\\\" }, // backslash in quotes only escapes a delimiter (",")
- { "a\\", "b" }, // a backslash must be returnd
- { "a\\\\,b" } // backslash in quotes only escapes a delimiter (",")
- };
- CSVParser parser = new CSVParser(new StringReader(code));
- String[][] tmp = parser.getAllValues();
- assertEquals(res.length, tmp.length);
- assertTrue(tmp.length > 0);
- for (int i = 0; i < res.length; i++) {
- assertTrue(Arrays.equals(res[i], tmp[i]));
- }
- }
-
- public void testBackslashEscaping() throws IOException {
-
- // To avoid confusion over the need for escaping chars in java code,
- // We will test with a forward slash as the escape char, and a single
- // quote as the encapsulator.
-
- String code =
- "one,two,three\n" // 0
- + "'',''\n" // 1) empty encapsulators
- + "/',/'\n" // 2) single encapsulators
- + "'/'','/''\n" // 3) single encapsulators encapsulated via escape
- + "'''',''''\n" // 4) single encapsulators encapsulated via doubling
- + "/,,/,\n" // 5) separator escaped
- + "//,//\n" // 6) escape escaped
- + "'//','//'\n" // 7) escape escaped in encapsulation
- + " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces
- + "9, /\n \n" // escaped newline
- + "";
- String[][] res = {
- { "one", "two", "three" }, // 0
- { "", "" }, // 1
- { "'", "'" }, // 2
- { "'", "'" }, // 3
- { "'", "'" }, // 4
- { ",", "," }, // 5
- { "/", "/" }, // 6
- { "/", "/" }, // 7
- { " 8 ", " \"quoted \"\" \" / string\" " },
- { "9", " \n " },
- };
-
-
- CSVStrategy strategy = new CSVStrategy(',','\'',CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true);
-
- CSVParser parser = new CSVParser(new StringReader(code), strategy);
- String[][] tmp = parser.getAllValues();
- assertTrue(tmp.length > 0);
- for (int i = 0; i < res.length; i++) {
- assertTrue(Arrays.equals(res[i], tmp[i]));
- }
- }
-
- public void testBackslashEscaping2() throws IOException {
-
- // To avoid confusion over the need for escaping chars in java code,
- // We will test with a forward slash as the escape char, and a single
- // quote as the encapsulator.
-
- String code = ""
- + " , , \n" // 1)
- + " \t , , \n" // 2)
- + " // , /, , /,\n" // 3)
- + "";
- String[][] res = {
- { " ", " ", " " }, // 1
- { " \t ", " ", " " }, // 2
- { " / ", " , ", " ," }, //3
- };
-
-
- CSVStrategy strategy = new CSVStrategy(',',CSVStrategy.ENCAPSULATOR_DISABLED,CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true);
-
- CSVParser parser = new CSVParser(new StringReader(code), strategy);
- String[][] tmp = parser.getAllValues();
- assertTrue(tmp.length > 0);
-
- if (!CSVPrinterTest.equals(res, tmp)) {
- assertTrue(false);
+ String code;
+ for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
+ code = codes[codeIndex];
+ CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
+ String[][] tmp = parser.getAllValues();
+ assertEquals(res.length, tmp.length);
+ assertTrue(tmp.length > 0);
+ for (int i = 0; i < res.length; i++) {
+ assertTrue(Arrays.equals(res[i], tmp[i]));
+ }
+ }
}
- }
-
-
- public void testDefaultStrategy() throws IOException {
-
- String code = ""
- + "a,b\n" // 1)
- + "\"\n\",\" \"\n" // 2)
- + "\"\",#\n" // 2)
- ;
- String[][] res = {
- { "a", "b" },
- { "\n", " " },
- { "", "#" },
- };
-
- CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
- assertEquals(CSVStrategy.COMMENTS_DISABLED, strategy.getCommentStart());
-
- CSVParser parser = new CSVParser(new StringReader(code), strategy);
- String[][] tmp = parser.getAllValues();
- assertTrue(tmp.length > 0);
-
- if (!CSVPrinterTest.equals(res, tmp)) {
- assertTrue(false);
+ public void testEndOfFileBehaviorCSV() throws Exception {
+ String[] codes = {
+ "hello,\r\n\r\nworld,\r\n",
+ "hello,\r\n\r\nworld,",
+ "hello,\r\n\r\nworld,\"\"\r\n",
+ "hello,\r\n\r\nworld,\"\"",
+ "hello,\r\n\r\nworld,\n",
+ "hello,\r\n\r\nworld,",
+ "hello,\r\n\r\nworld,\"\"\n",
+ "hello,\r\n\r\nworld,\"\""
+ };
+ String[][] res = {
+ {"hello", ""}, // CSV Strategy ignores empty lines
+ {"world", ""}
+ };
+ String code;
+ for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
+ code = codes[codeIndex];
+ CSVParser parser = new CSVParser(new StringReader(code));
+ String[][] tmp = parser.getAllValues();
+ assertEquals(res.length, tmp.length);
+ assertTrue(tmp.length > 0);
+ for (int i = 0; i < res.length; i++) {
+ assertTrue(Arrays.equals(res[i], tmp[i]));
+ }
+ }
}
- String[][] res_comments = {
- { "a", "b" },
- { "\n", " " },
- { ""},
- };
-
- strategy = new CSVStrategy(',','"','#');
- parser = new CSVParser(new StringReader(code), strategy);
- tmp = parser.getAllValues();
-
- if (!CSVPrinterTest.equals(res_comments, tmp)) {
- assertTrue(false);
+ public void testEmptyLineBehaviourExcel() throws Exception {
+ String[] codes = {
+ "hello,\r\n\r\n\r\n",
+ "hello,\n\n\n",
+ "hello,\"\"\r\n\r\n\r\n",
+ "hello,\"\"\n\n\n"
+ };
+ String[][] res = {
+ {"hello", ""},
+ {""}, // ExcelStrategy does not ignore empty lines
+ {""}
+ };
+ String code;
+ for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
+ code = codes[codeIndex];
+ CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
+ String[][] tmp = parser.getAllValues();
+ assertEquals(res.length, tmp.length);
+ assertTrue(tmp.length > 0);
+ for (int i = 0; i < res.length; i++) {
+ assertTrue(Arrays.equals(res[i], tmp[i]));
+ }
+ }
+ }
+
+ public void testEmptyLineBehaviourCSV() throws Exception {
+ String[] codes = {
+ "hello,\r\n\r\n\r\n",
+ "hello,\n\n\n",
+ "hello,\"\"\r\n\r\n\r\n",
+ "hello,\"\"\n\n\n"
+ };
+ String[][] res = {
+ {"hello", ""} // CSV Strategy ignores empty lines
+ };
+ String code;
+ for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
+ code = codes[codeIndex];
+ CSVParser parser = new CSVParser(new StringReader(code));
+ String[][] tmp = parser.getAllValues();
+ assertEquals(res.length, tmp.length);
+ assertTrue(tmp.length > 0);
+ for (int i = 0; i < res.length; i++) {
+ assertTrue(Arrays.equals(res[i], tmp[i]));
+ }
+ }
+ }
+
+ public void OLDtestBackslashEscaping() throws IOException {
+ String code =
+ "one,two,three\n"
+ + "on\\\"e,two\n"
+ + "on\"e,two\n"
+ + "one,\"tw\\\"o\"\n"
+ + "one,\"t\\,wo\"\n"
+ + "one,two,\"th,ree\"\n"
+ + "\"a\\\\\"\n"
+ + "a\\,b\n"
+ + "\"a\\\\,b\"";
+ String[][] res = {
+ {"one", "two", "three"},
+ {"on\\\"e", "two"},
+ {"on\"e", "two"},
+ {"one", "tw\"o"},
+ {"one", "t\\,wo"}, // backslash in quotes only escapes a delimiter (",")
+ {"one", "two", "th,ree"},
+ {"a\\\\"}, // backslash in quotes only escapes a delimiter (",")
+ {"a\\", "b"}, // a backslash must be returnd
+ {"a\\\\,b"} // backslash in quotes only escapes a delimiter (",")
+ };
+ CSVParser parser = new CSVParser(new StringReader(code));
+ String[][] tmp = parser.getAllValues();
+ assertEquals(res.length, tmp.length);
+ assertTrue(tmp.length > 0);
+ for (int i = 0; i < res.length; i++) {
+ assertTrue(Arrays.equals(res[i], tmp[i]));
+ }
+ }
+
+ public void testBackslashEscaping() throws IOException {
+
+ // To avoid confusion over the need for escaping chars in java code,
+ // We will test with a forward slash as the escape char, and a single
+ // quote as the encapsulator.
+
+ String code =
+ "one,two,three\n" // 0
+ + "'',''\n" // 1) empty encapsulators
+ + "/',/'\n" // 2) single encapsulators
+ + "'/'','/''\n" // 3) single encapsulators encapsulated via escape
+ + "'''',''''\n" // 4) single encapsulators encapsulated via doubling
+ + "/,,/,\n" // 5) separator escaped
+ + "//,//\n" // 6) escape escaped
+ + "'//','//'\n" // 7) escape escaped in encapsulation
+ + " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces
+ + "9, /\n \n" // escaped newline
+ + "";
+ String[][] res = {
+ {"one", "two", "three"}, // 0
+ {"", ""}, // 1
+ {"'", "'"}, // 2
+ {"'", "'"}, // 3
+ {"'", "'"}, // 4
+ {",", ","}, // 5
+ {"/", "/"}, // 6
+ {"/", "/"}, // 7
+ {" 8 ", " \"quoted \"\" \" / string\" "},
+ {"9", " \n "},
+ };
+
+
+ CSVStrategy strategy = new CSVStrategy(',', '\'', CSVStrategy.COMMENTS_DISABLED, '/', false, false, true, true);
+
+ CSVParser parser = new CSVParser(new StringReader(code), strategy);
+ String[][] tmp = parser.getAllValues();
+ assertTrue(tmp.length > 0);
+ for (int i = 0; i < res.length; i++) {
+ assertTrue(Arrays.equals(res[i], tmp[i]));
+ }
+ }
+
+ public void testBackslashEscaping2() throws IOException {
+
+ // To avoid confusion over the need for escaping chars in java code,
+ // We will test with a forward slash as the escape char, and a single
+ // quote as the encapsulator.
+
+ String code = ""
+ + " , , \n" // 1)
+ + " \t , , \n" // 2)
+ + " // , /, , /,\n" // 3)
+ + "";
+ String[][] res = {
+ {" ", " ", " "}, // 1
+ {" \t ", " ", " "}, // 2
+ {" / ", " , ", " ,"}, //3
+ };
+
+
+ CSVStrategy strategy = new CSVStrategy(',', CSVStrategy.ENCAPSULATOR_DISABLED, CSVStrategy.COMMENTS_DISABLED, '/', false, false, true, true);
+
+ CSVParser parser = new CSVParser(new StringReader(code), strategy);
+ String[][] tmp = parser.getAllValues();
+ assertTrue(tmp.length > 0);
+
+ if (!CSVPrinterTest.equals(res, tmp)) {
+ assertTrue(false);
+ }
+
+ }
+
+
+ public void testDefaultStrategy() throws IOException {
+
+ String code = ""
+ + "a,b\n" // 1)
+ + "\"\n\",\" \"\n" // 2)
+ + "\"\",#\n" // 2)
+ ;
+ String[][] res = {
+ {"a", "b"},
+ {"\n", " "},
+ {"", "#"},
+ };
+
+ CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
+ assertEquals(CSVStrategy.COMMENTS_DISABLED, strategy.getCommentStart());
+
+ CSVParser parser = new CSVParser(new StringReader(code), strategy);
+ String[][] tmp = parser.getAllValues();
+ assertTrue(tmp.length > 0);
+
+ if (!CSVPrinterTest.equals(res, tmp)) {
+ assertTrue(false);
+ }
+
+ String[][] res_comments = {
+ {"a", "b"},
+ {"\n", " "},
+ {""},
+ };
+
+ strategy = new CSVStrategy(',', '"', '#');
+ parser = new CSVParser(new StringReader(code), strategy);
+ tmp = parser.getAllValues();
+
+ if (!CSVPrinterTest.equals(res_comments, tmp)) {
+ assertTrue(false);
+ }
}
- }
public void testUnicodeEscape() throws IOException {
- String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
- CSVParser parser = new CSVParser(new StringReader(code));
- parser.getStrategy().setUnicodeEscapeInterpretation(true);
- String[] data = parser.getLine();
- assertEquals(2, data.length);
- assertEquals("abc", data[0]);
- assertEquals("public", data[1]);
+ String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
+ CSVParser parser = new CSVParser(new StringReader(code));
+ parser.getStrategy().setUnicodeEscapeInterpretation(true);
+ String[] data = parser.getLine();
+ assertEquals(2, data.length);
+ assertEquals("abc", data[0]);
+ assertEquals("public", data[1]);
}
-
+
public void testCarriageReturnLineFeedEndings() throws IOException {
- String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
- CSVParser parser = new CSVParser(new StringReader(code));
- String[][] data = parser.getAllValues();
- assertEquals(4, data.length);
+ String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
+ CSVParser parser = new CSVParser(new StringReader(code));
+ String[][] data = parser.getAllValues();
+ assertEquals(4, data.length);
}
public void testCarriageReturnEndings() throws IOException {
- String code = "foo\rbaar,\rhello,world\r,kanu";
- CSVParser parser = new CSVParser(new StringReader(code));
- String[][] data = parser.getAllValues();
- assertEquals(4, data.length);
+ String code = "foo\rbaar,\rhello,world\r,kanu";
+ CSVParser parser = new CSVParser(new StringReader(code));
+ String[][] data = parser.getAllValues();
+ assertEquals(4, data.length);
}
public void testLineFeedEndings() throws IOException {
- String code = "foo\nbaar,\nhello,world\n,kanu";
- CSVParser parser = new CSVParser(new StringReader(code));
- String[][] data = parser.getAllValues();
- assertEquals(4, data.length);
+ String code = "foo\nbaar,\nhello,world\n,kanu";
+ CSVParser parser = new CSVParser(new StringReader(code));
+ String[][] data = parser.getAllValues();
+ assertEquals(4, data.length);
}
public void testIgnoreEmptyLines() throws IOException {
- String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
- //String code = "world\r\n\n";
- //String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
- CSVParser parser = new CSVParser(new StringReader(code));
- String[][] data = parser.getAllValues();
- assertEquals(3, data.length);
+ String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
+ //String code = "world\r\n\n";
+ //String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n";
+ CSVParser parser = new CSVParser(new StringReader(code));
+ String[][] data = parser.getAllValues();
+ assertEquals(3, data.length);
}
-
+
public void testLineTokenConsistency() throws IOException {
- String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
- CSVParser parser = new CSVParser(new StringReader(code));
- String[][] data = parser.getAllValues();
- parser = new CSVParser(new StringReader(code));
- CSVParser parser1 = new CSVParser(new StringReader(code));
- for (int i = 0; i < data.length; i++) {
- assertTrue(Arrays.equals(parser1.getLine(), data[i]));
- for (int j = 0; j < data[i].length; j++) {
- assertEquals(parser.nextValue(), data[i][j]);
+ String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n";
+ CSVParser parser = new CSVParser(new StringReader(code));
+ String[][] data = parser.getAllValues();
+ parser = new CSVParser(new StringReader(code));
+ CSVParser parser1 = new CSVParser(new StringReader(code));
+ for (int i = 0; i < data.length; i++) {
+ assertTrue(Arrays.equals(parser1.getLine(), data[i]));
+ for (int j = 0; j < data[i].length; j++) {
+ assertEquals(parser.nextValue(), data[i][j]);
+ }
}
- }
}
// From SANDBOX-153
- public void testDelimiterIsWhitespace() throws IOException {
- String code = "one\ttwo\t\tfour \t five\t six";
- TestCSVParser parser = new TestCSVParser(new StringReader(code), CSVStrategy.TDF_STRATEGY);
- assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";four;", parser.testNextToken());
- assertEquals(CSVParser.TT_TOKEN + ";five;", parser.testNextToken());
- assertEquals(CSVParser.TT_EOF + ";six;", parser.testNextToken());
- }
+ public void testDelimiterIsWhitespace() throws IOException {
+ String code = "one\ttwo\t\tfour \t five\t six";
+ TestCSVParser parser = new TestCSVParser(new StringReader(code), CSVStrategy.TDF_STRATEGY);
+ assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";four;", parser.testNextToken());
+ assertEquals(CSVParser.TT_TOKEN + ";five;", parser.testNextToken());
+ assertEquals(CSVParser.TT_EOF + ";six;", parser.testNextToken());
+ }
}
diff --git a/src/test/org/apache/commons/csv/CSVPrinterTest.java b/src/test/org/apache/commons/csv/CSVPrinterTest.java
index 2b15aced..cc4ec9fd 100644
--- a/src/test/org/apache/commons/csv/CSVPrinterTest.java
+++ b/src/test/org/apache/commons/csv/CSVPrinterTest.java
@@ -30,200 +30,219 @@ import junit.framework.TestSuite;
* CSVPrinterTest
*/
public class CSVPrinterTest extends TestCase {
-
- String lineSeparator = "\n";
- public void testPrinter1() throws IOException {
- StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY);
- String[] line1 = {"a", "b"};
- printer.println(line1);
- assertEquals("a,b" + lineSeparator, sw.toString());
- }
+ String lineSeparator = "\n";
- public void testPrinter2() throws IOException {
- StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY);
- String[] line1 = {"a,b", "b"};
- printer.println(line1);
- assertEquals("\"a,b\",b" + lineSeparator, sw.toString());
- }
-
- public void testPrinter3() throws IOException {
- StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY);
- String[] line1 = {"a, b", "b "};
- printer.println(line1);
- assertEquals("\"a, b\",\"b \"" + lineSeparator, sw.toString());
- }
-
- public void testPrinter4() throws IOException {
- StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY);
- String[] line1 = {"a", "b\"c"};
- printer.println(line1);
- assertEquals("a,\"b\"\"c\"" + lineSeparator, sw.toString());
- }
-
- public void testPrinter5() throws IOException {
- StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY);
- String[] line1 = {"a", "b\nc"};
- printer.println(line1);
- assertEquals("a,\"b\nc\"" + lineSeparator, sw.toString());
- }
-
- public void testPrinter6() throws IOException {
- StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY);
- String[] line1 = {"a", "b\r\nc"};
- printer.println(line1);
- assertEquals("a,\"b\r\nc\"" + lineSeparator, sw.toString());
- }
-
- public void testPrinter7() throws IOException {
- StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY);
- String[] line1 = {"a", "b\\c"};
- printer.println(line1);
- assertEquals("a,b\\c" + lineSeparator, sw.toString());
- }
-
- public void testExcelPrinter1() throws IOException {
- StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.EXCEL_STRATEGY);
- String[] line1 = {"a", "b"};
- printer.println(line1);
- assertEquals("a,b" + lineSeparator, sw.toString());
- }
-
- public void testExcelPrinter2() throws IOException {
- StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.EXCEL_STRATEGY);
- String[] line1 = {"a,b", "b"};
- printer.println(line1);
- assertEquals("\"a,b\",b" + lineSeparator, sw.toString());
- }
-
-
-
- public void testRandom() throws Exception {
- int iter=10000;
- strategy = CSVStrategy.DEFAULT_STRATEGY;
- doRandom(iter);
- strategy = CSVStrategy.EXCEL_STRATEGY;
- doRandom(iter);
-
- // Strategy for MySQL
- strategy = new CSVStrategy('\t', CSVStrategy.ENCAPSULATOR_DISABLED, CSVStrategy.COMMENTS_DISABLED,'\\',false, false, false, false);
- doRandom(iter);
- }
-
- Random r = new Random();
- CSVStrategy strategy;
-
- public void doRandom(int iter) throws Exception {
- for (int i=0; i=128) {
- sb.append("(" + (int)ch + ")");
- } else {
- sb.append(ch);
- }
- }
- return sb.toString();
- }
+ public void doOneRandom() throws Exception {
+ int nLines = r.nextInt(4) + 1;
+ int nCol = r.nextInt(3) + 1;
+ // nLines=1;nCol=2;
+ String[][] lines = new String[nLines][];
+ for (int i = 0; i < nLines; i++) {
+ String[] line = new String[nCol];
+ lines[i] = line;
+ for (int j = 0; j < nCol; j++) {
+ line[j] = randStr();
+ }
+ }
- public String randStr() {
- int sz = r.nextInt(20);
- // sz = r.nextInt(3);
- char[] buf = new char[sz];
- for (int i=0; i= 128) {
+ sb.append("(" + (int) ch + ")");
+ } else {
+ sb.append(ch);
+ }
+ }
+ return sb.toString();
+ }
+
+ public String randStr() {
+ int sz = r.nextInt(20);
+ // sz = r.nextInt(3);
+ char[] buf = new char[sz];
+ for (int i = 0; i < sz; i++) {
+ // stick in special chars with greater frequency
+ char ch;
+ int what = r.nextInt(20);
+ switch (what) {
+ case 0:
+ ch = '\r';
+ break;
+ case 1:
+ ch = '\n';
+ break;
+ case 2:
+ ch = '\t';
+ break;
+ case 3:
+ ch = '\f';
+ break;
+ case 4:
+ ch = ' ';
+ break;
+ case 5:
+ ch = ',';
+ break;
+ case 6:
+ ch = '"';
+ break;
+ case 7:
+ ch = '\'';
+ break;
+ case 8:
+ ch = '\\';
+ break;
+ default:
+ ch = (char) r.nextInt(300);
+ break;
+ // default: ch = 'a'; break;
+ }
+ buf[i] = ch;
+ }
+ return new String(buf);
}
- return new String(buf);
- }
}
diff --git a/src/test/org/apache/commons/csv/CSVStrategyTest.java b/src/test/org/apache/commons/csv/CSVStrategyTest.java
index 137bf883..489928c8 100644
--- a/src/test/org/apache/commons/csv/CSVStrategyTest.java
+++ b/src/test/org/apache/commons/csv/CSVStrategyTest.java
@@ -24,68 +24,68 @@ import junit.framework.TestCase;
* CSVStrategyTest
*
* The test are organized in three different sections:
- * The 'setter/getter' section, the lexer section and finally the strategy
- * section. In case a test fails, you should follow a top-down approach for
+ * The 'setter/getter' section, the lexer section and finally the strategy
+ * section. In case a test fails, you should follow a top-down approach for
* fixing a potential bug (its likely that the strategy itself fails if the lexer
* has problems...).
*/
public class CSVStrategyTest extends TestCase {
- // ======================================================
- // getters / setters
- // ======================================================
- public void testGetSetCommentStart() {
- CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
- strategy.setCommentStart('#');
- assertEquals(strategy.getCommentStart(), '#');
- strategy.setCommentStart('!');
- assertEquals(strategy.getCommentStart(), '!');
- }
+ // ======================================================
+ // getters / setters
+ // ======================================================
+ public void testGetSetCommentStart() {
+ CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
+ strategy.setCommentStart('#');
+ assertEquals(strategy.getCommentStart(), '#');
+ strategy.setCommentStart('!');
+ assertEquals(strategy.getCommentStart(), '!');
+ }
- public void testGetSetEncapsulator() {
- CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
- strategy.setEncapsulator('"');
- assertEquals(strategy.getEncapsulator(), '"');
- strategy.setEncapsulator('\'');
- assertEquals(strategy.getEncapsulator(), '\'');
- }
+ public void testGetSetEncapsulator() {
+ CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
+ strategy.setEncapsulator('"');
+ assertEquals(strategy.getEncapsulator(), '"');
+ strategy.setEncapsulator('\'');
+ assertEquals(strategy.getEncapsulator(), '\'');
+ }
- public void testGetSetDelimiter() {
- CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
- strategy.setDelimiter(';');
- assertEquals(strategy.getDelimiter(), ';');
- strategy.setDelimiter(',');
- assertEquals(strategy.getDelimiter(), ',');
- strategy.setDelimiter('\t');
- assertEquals(strategy.getDelimiter(), '\t');
- }
+ public void testGetSetDelimiter() {
+ CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
+ strategy.setDelimiter(';');
+ assertEquals(strategy.getDelimiter(), ';');
+ strategy.setDelimiter(',');
+ assertEquals(strategy.getDelimiter(), ',');
+ strategy.setDelimiter('\t');
+ assertEquals(strategy.getDelimiter(), '\t');
+ }
+
+ public void testSetCSVStrategy() {
+ CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
+ // default settings
+ assertEquals(strategy.getDelimiter(), ',');
+ assertEquals(strategy.getEncapsulator(), '"');
+ assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
+ assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
+ assertEquals(false, strategy.getUnicodeEscapeInterpretation());
+ assertEquals(true, strategy.getIgnoreEmptyLines());
+ // explicit csv settings
+ assertEquals(strategy.getDelimiter(), ',');
+ assertEquals(strategy.getEncapsulator(), '"');
+ assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
+ assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
+ assertEquals(false, strategy.getUnicodeEscapeInterpretation());
+ assertEquals(true, strategy.getIgnoreEmptyLines());
+ }
+
+ public void testSetExcelStrategy() {
+ CSVStrategy strategy = CSVStrategy.EXCEL_STRATEGY;
+ assertEquals(strategy.getDelimiter(), ',');
+ assertEquals(strategy.getEncapsulator(), '"');
+ assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
+ assertEquals(false, strategy.getIgnoreLeadingWhitespaces());
+ assertEquals(false, strategy.getUnicodeEscapeInterpretation());
+ assertEquals(false, strategy.getIgnoreEmptyLines());
+ }
- public void testSetCSVStrategy() {
- CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
- // default settings
- assertEquals(strategy.getDelimiter(), ',');
- assertEquals(strategy.getEncapsulator(), '"');
- assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
- assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
- assertEquals(false, strategy.getUnicodeEscapeInterpretation());
- assertEquals(true, strategy.getIgnoreEmptyLines());
- // explicit csv settings
- assertEquals(strategy.getDelimiter(), ',');
- assertEquals(strategy.getEncapsulator(), '"');
- assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
- assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
- assertEquals(false, strategy.getUnicodeEscapeInterpretation());
- assertEquals(true, strategy.getIgnoreEmptyLines());
- }
-
- public void testSetExcelStrategy() {
- CSVStrategy strategy = CSVStrategy.EXCEL_STRATEGY;
- assertEquals(strategy.getDelimiter(), ',');
- assertEquals(strategy.getEncapsulator(), '"');
- assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
- assertEquals(false, strategy.getIgnoreLeadingWhitespaces());
- assertEquals(false, strategy.getUnicodeEscapeInterpretation());
- assertEquals(false, strategy.getIgnoreEmptyLines());
- }
-
}
diff --git a/src/test/org/apache/commons/csv/CSVUtilsTest.java b/src/test/org/apache/commons/csv/CSVUtilsTest.java
index c32cefab..94c5a54f 100644
--- a/src/test/org/apache/commons/csv/CSVUtilsTest.java
+++ b/src/test/org/apache/commons/csv/CSVUtilsTest.java
@@ -24,127 +24,127 @@ import junit.framework.TestCase;
* CSVUtilsTest
*/
public class CSVUtilsTest extends TestCase {
-
- // ======================================================
- // static parser tests
- // ======================================================
- public void testParse1() throws IOException {
- String[][] data = CSVUtils.parse("abc\ndef");
- assertEquals(2, data.length);
- assertEquals(1, data[0].length);
- assertEquals(1, data[1].length);
- assertEquals("abc", data[0][0]);
- assertEquals("def", data[1][0]);
+
+ // ======================================================
+ // static parser tests
+ // ======================================================
+ public void testParse1() throws IOException {
+ String[][] data = CSVUtils.parse("abc\ndef");
+ assertEquals(2, data.length);
+ assertEquals(1, data[0].length);
+ assertEquals(1, data[1].length);
+ assertEquals("abc", data[0][0]);
+ assertEquals("def", data[1][0]);
}
public void testParse2() throws IOException {
- String[][] data = CSVUtils.parse("abc,def,\"ghi,jkl\"\ndef");
- assertEquals(2, data.length);
- assertEquals(3, data[0].length);
- assertEquals(1, data[1].length);
- assertEquals("abc", data[0][0]);
- assertEquals("def", data[0][1]);
- assertEquals("ghi,jkl", data[0][2]);
- assertEquals("def", data[1][0]);
+ String[][] data = CSVUtils.parse("abc,def,\"ghi,jkl\"\ndef");
+ assertEquals(2, data.length);
+ assertEquals(3, data[0].length);
+ assertEquals(1, data[1].length);
+ assertEquals("abc", data[0][0]);
+ assertEquals("def", data[0][1]);
+ assertEquals("ghi,jkl", data[0][2]);
+ assertEquals("def", data[1][0]);
}
public void testParse3() throws IOException {
- String[][] data = CSVUtils.parse("abc,\"def\nghi\"\njkl");
- assertEquals(2, data.length);
- assertEquals(2, data[0].length);
- assertEquals(1, data[1].length);
- assertEquals("abc", data[0][0]);
- assertEquals("def\nghi", data[0][1]);
- assertEquals("jkl", data[1][0]);
+ String[][] data = CSVUtils.parse("abc,\"def\nghi\"\njkl");
+ assertEquals(2, data.length);
+ assertEquals(2, data[0].length);
+ assertEquals(1, data[1].length);
+ assertEquals("abc", data[0][0]);
+ assertEquals("def\nghi", data[0][1]);
+ assertEquals("jkl", data[1][0]);
}
public void testParse4() throws IOException {
- String[][] data = CSVUtils.parse("abc,\"def\\\\nghi\"\njkl");
- assertEquals(2, data.length);
- assertEquals(2, data[0].length);
- assertEquals(1, data[1].length);
- assertEquals("abc", data[0][0]);
- // an escape char in quotes only escapes a delimiter, not itself
- assertEquals("def\\\\nghi", data[0][1]);
- assertEquals("jkl", data[1][0]);
+ String[][] data = CSVUtils.parse("abc,\"def\\\\nghi\"\njkl");
+ assertEquals(2, data.length);
+ assertEquals(2, data[0].length);
+ assertEquals(1, data[1].length);
+ assertEquals("abc", data[0][0]);
+ // an escape char in quotes only escapes a delimiter, not itself
+ assertEquals("def\\\\nghi", data[0][1]);
+ assertEquals("jkl", data[1][0]);
}
public void testParse5() throws IOException {
- String[][] data = CSVUtils.parse("abc,def\\nghi\njkl");
- assertEquals(2, data.length);
- assertEquals(2, data[0].length);
- assertEquals(1, data[1].length);
- assertEquals("abc", data[0][0]);
- assertEquals("def\\nghi", data[0][1]);
- assertEquals("jkl", data[1][0]);
+ String[][] data = CSVUtils.parse("abc,def\\nghi\njkl");
+ assertEquals(2, data.length);
+ assertEquals(2, data[0].length);
+ assertEquals(1, data[1].length);
+ assertEquals("abc", data[0][0]);
+ assertEquals("def\\nghi", data[0][1]);
+ assertEquals("jkl", data[1][0]);
}
-
+
public void testParse6() throws IOException {
- String[][] data = CSVUtils.parse("");
- // default strategy is CSV, which ignores empty lines
- assertEquals(0, data.length);
+ String[][] data = CSVUtils.parse("");
+ // default strategy is CSV, which ignores empty lines
+ assertEquals(0, data.length);
}
-
+
public void testParse7() throws IOException {
- boolean io = false;
- try {
- CSVUtils.parse(null);
- } catch (IllegalArgumentException e) {
- io = true;
- }
- assertTrue(io);
+ boolean io = false;
+ try {
+ CSVUtils.parse(null);
+ } catch (IllegalArgumentException e) {
+ io = true;
+ }
+ assertTrue(io);
}
-
+
public void testParseLine1() throws IOException {
- String[] data = CSVUtils.parseLine("abc,def,ghi");
- assertEquals(3, data.length);
- assertEquals("abc", data[0]);
- assertEquals("def", data[1]);
- assertEquals("ghi", data[2]);
+ String[] data = CSVUtils.parseLine("abc,def,ghi");
+ assertEquals(3, data.length);
+ assertEquals("abc", data[0]);
+ assertEquals("def", data[1]);
+ assertEquals("ghi", data[2]);
}
public void testParseLine2() throws IOException {
- String[] data = CSVUtils.parseLine("abc,def,ghi\n");
- assertEquals(3, data.length);
- assertEquals("abc", data[0]);
- assertEquals("def", data[1]);
- assertEquals("ghi", data[2]);
+ String[] data = CSVUtils.parseLine("abc,def,ghi\n");
+ assertEquals(3, data.length);
+ assertEquals("abc", data[0]);
+ assertEquals("def", data[1]);
+ assertEquals("ghi", data[2]);
}
public void testParseLine3() throws IOException {
- String[] data = CSVUtils.parseLine("abc,\"def,ghi\"");
- assertEquals(2, data.length);
- assertEquals("abc", data[0]);
- assertEquals("def,ghi", data[1]);
+ String[] data = CSVUtils.parseLine("abc,\"def,ghi\"");
+ assertEquals(2, data.length);
+ assertEquals("abc", data[0]);
+ assertEquals("def,ghi", data[1]);
}
public void testParseLine4() throws IOException {
- String[] data = CSVUtils.parseLine("abc,\"def\nghi\"");
- assertEquals(2, data.length);
- assertEquals("abc", data[0]);
- assertEquals("def\nghi", data[1]);
+ String[] data = CSVUtils.parseLine("abc,\"def\nghi\"");
+ assertEquals(2, data.length);
+ assertEquals("abc", data[0]);
+ assertEquals("def\nghi", data[1]);
}
-
+
public void testParseLine5() throws IOException {
- String[] data = CSVUtils.parseLine("");
- assertEquals(0, data.length);
- // assertEquals("", data[0]);
+ String[] data = CSVUtils.parseLine("");
+ assertEquals(0, data.length);
+ // assertEquals("", data[0]);
}
-
+
public void testParseLine6() throws IOException {
- boolean io = false;
- try {
- CSVUtils.parseLine(null);
- } catch (IllegalArgumentException e) {
- io = true;
- }
- assertTrue(io);
+ boolean io = false;
+ try {
+ CSVUtils.parseLine(null);
+ } catch (IllegalArgumentException e) {
+ io = true;
+ }
+ assertTrue(io);
}
-
+
public void testParseLine7() throws IOException {
- String[] res = CSVUtils.parseLine("");
- assertNotNull(res);
- assertEquals(0, res.length);
+ String[] res = CSVUtils.parseLine("");
+ assertNotNull(res);
+ assertEquals(0, res.length);
}
-
+
}
diff --git a/src/test/org/apache/commons/csv/CharBufferTest.java b/src/test/org/apache/commons/csv/CharBufferTest.java
index dc0d758e..24767445 100644
--- a/src/test/org/apache/commons/csv/CharBufferTest.java
+++ b/src/test/org/apache/commons/csv/CharBufferTest.java
@@ -21,7 +21,6 @@ package org.apache.commons.csv;
import junit.framework.TestCase;
/**
- *
* @author Ortwin Glück
*/
public class CharBufferTest extends TestCase {
@@ -31,14 +30,14 @@ public class CharBufferTest extends TestCase {
try {
cb = new CharBuffer(0);
fail("Should not be possible");
- } catch(IllegalArgumentException e) {
+ } catch (IllegalArgumentException e) {
// expected
}
-
+
cb = new CharBuffer(128);
assertEquals(0, cb.length());
}
-
+
public void testAppendChar() {
CharBuffer cb = new CharBuffer(1);
String expected = "";
@@ -49,59 +48,59 @@ public class CharBufferTest extends TestCase {
assertEquals(expected.length(), cb.length());
}
}
-
+
public void testAppendCharArray() {
CharBuffer cb = new CharBuffer(1);
char[] abcd = "abcd".toCharArray();
String expected = "";
- for (int i=0; i<10; i++) {
+ for (int i = 0; i < 10; i++) {
cb.append(abcd);
expected += "abcd";
assertEquals(expected, cb.toString());
- assertEquals(4*(i+1), cb.length());
+ assertEquals(4 * (i + 1), cb.length());
}
}
-
+
public void testAppendString() {
CharBuffer cb = new CharBuffer(1);
String abcd = "abcd";
String expected = "";
- for (int i=0; i<10; i++) {
+ for (int i = 0; i < 10; i++) {
cb.append(abcd);
expected += abcd;
assertEquals(expected, cb.toString());
- assertEquals(4*(i+1), cb.length());
+ assertEquals(4 * (i + 1), cb.length());
}
}
-
+
public void testAppendStringBuffer() {
CharBuffer cb = new CharBuffer(1);
StringBuffer abcd = new StringBuffer("abcd");
String expected = "";
- for (int i=0; i<10; i++) {
+ for (int i = 0; i < 10; i++) {
cb.append(abcd);
expected += "abcd";
assertEquals(expected, cb.toString());
- assertEquals(4*(i+1), cb.length());
+ assertEquals(4 * (i + 1), cb.length());
}
}
-
+
public void testAppendCharBuffer() {
CharBuffer cb = new CharBuffer(1);
CharBuffer abcd = new CharBuffer(17);
abcd.append("abcd");
String expected = "";
- for (int i=0; i<10; i++) {
+ for (int i = 0; i < 10; i++) {
cb.append(abcd);
expected += "abcd";
assertEquals(expected, cb.toString());
- assertEquals(4*(i+1), cb.length());
+ assertEquals(4 * (i + 1), cb.length());
}
}
-
+
public void testShrink() {
String data = "123456789012345678901234567890";
-
+
CharBuffer cb = new CharBuffer(data.length() + 100);
assertEquals(data.length() + 100, cb.capacity());
cb.append(data);
@@ -112,24 +111,24 @@ public class CharBufferTest extends TestCase {
assertEquals(data.length(), cb.length());
assertEquals(data, cb.toString());
}
-
+
//-- the following test cases have been adapted from the HttpComponents project
//-- written by Oleg Kalnichevski
-
+
public void testSimpleAppend() throws Exception {
CharBuffer buffer = new CharBuffer(16);
- assertEquals(16, buffer.capacity());
+ assertEquals(16, buffer.capacity());
assertEquals(0, buffer.length());
char[] b1 = buffer.getCharacters();
assertNotNull(b1);
assertEquals(0, b1.length);
assertEquals(0, buffer.length());
-
- char[] tmp = new char[] { '1', '2', '3', '4'};
+
+ char[] tmp = new char[]{'1', '2', '3', '4'};
buffer.append(tmp);
- assertEquals(16, buffer.capacity());
+ assertEquals(16, buffer.capacity());
assertEquals(4, buffer.length());
-
+
char[] b2 = buffer.getCharacters();
assertNotNull(b2);
assertEquals(4, b2.length);
@@ -137,35 +136,35 @@ public class CharBufferTest extends TestCase {
assertEquals(tmp[i], b2[i]);
}
assertEquals("1234", buffer.toString());
-
+
buffer.clear();
- assertEquals(16, buffer.capacity());
+ assertEquals(16, buffer.capacity());
assertEquals(0, buffer.length());
}
-
+
public void testAppendString2() throws Exception {
CharBuffer buffer = new CharBuffer(8);
buffer.append("stuff");
buffer.append(" and more stuff");
assertEquals("stuff and more stuff", buffer.toString());
}
-
+
public void testAppendNull() throws Exception {
CharBuffer buffer = new CharBuffer(8);
-
- buffer.append((StringBuffer)null);
- assertEquals("", buffer.toString());
-
- buffer.append((String)null);
+
+ buffer.append((StringBuffer) null);
assertEquals("", buffer.toString());
- buffer.append((CharBuffer)null);
+ buffer.append((String) null);
assertEquals("", buffer.toString());
- buffer.append((char[])null);
+ buffer.append((CharBuffer) null);
+ assertEquals("", buffer.toString());
+
+ buffer.append((char[]) null);
assertEquals("", buffer.toString());
}
-
+
public void testAppendCharArrayBuffer() throws Exception {
CharBuffer buffer1 = new CharBuffer(8);
buffer1.append(" and more stuff");
@@ -174,7 +173,7 @@ public class CharBufferTest extends TestCase {
buffer2.append(buffer1);
assertEquals("stuff and more stuff", buffer2.toString());
}
-
+
public void testAppendSingleChar() throws Exception {
CharBuffer buffer = new CharBuffer(4);
buffer.append('1');
@@ -185,7 +184,7 @@ public class CharBufferTest extends TestCase {
buffer.append('6');
assertEquals("123456", buffer.toString());
}
-
+
public void testProvideCapacity() throws Exception {
CharBuffer buffer = new CharBuffer(4);
buffer.provideCapacity(2);
diff --git a/src/test/org/apache/commons/csv/ExtendedBufferedReaderTest.java b/src/test/org/apache/commons/csv/ExtendedBufferedReaderTest.java
index 5f4ce2d3..82271429 100644
--- a/src/test/org/apache/commons/csv/ExtendedBufferedReaderTest.java
+++ b/src/test/org/apache/commons/csv/ExtendedBufferedReaderTest.java
@@ -25,139 +25,138 @@ import junit.framework.TestSuite;
/**
* ExtendedBufferedReaderTest
- *
*/
public class ExtendedBufferedReaderTest extends TestCase {
- // ======================================================
- // the test cases
- // ======================================================
-
- public void testConstructors() {
- ExtendedBufferedReader br = new ExtendedBufferedReader(new StringReader(""));
- br = new ExtendedBufferedReader(new StringReader(""), 10);
- }
-
- public void testReadLookahead1() throws Exception {
-
- assertEquals(ExtendedBufferedReader.END_OF_STREAM, getEBR("").read());
- ExtendedBufferedReader br = getEBR("1\n2\r3\n");
- assertEquals('1', br.lookAhead());
- assertEquals(ExtendedBufferedReader.UNDEFINED, br.readAgain());
- assertEquals('1', br.read());
- assertEquals('1', br.readAgain());
+ // ======================================================
+ // the test cases
+ // ======================================================
- assertEquals(0, br.getLineNumber());
- assertEquals('\n', br.lookAhead());
- assertEquals(0, br.getLineNumber());
- assertEquals('1', br.readAgain());
- assertEquals('\n', br.read());
- assertEquals(1, br.getLineNumber());
- assertEquals('\n', br.readAgain());
- assertEquals(1, br.getLineNumber());
-
- assertEquals('2', br.lookAhead());
- assertEquals(1, br.getLineNumber());
- assertEquals('\n', br.readAgain());
- assertEquals(1, br.getLineNumber());
- assertEquals('2', br.read());
- assertEquals('2', br.readAgain());
-
- assertEquals('\r', br.lookAhead());
- assertEquals('2', br.readAgain());
- assertEquals('\r', br.read());
- assertEquals('\r', br.readAgain());
-
- assertEquals('3', br.lookAhead());
- assertEquals('\r', br.readAgain());
- assertEquals('3', br.read());
- assertEquals('3', br.readAgain());
-
- assertEquals('\n', br.lookAhead());
- assertEquals(1, br.getLineNumber());
- assertEquals('3', br.readAgain());
- assertEquals('\n', br.read());
- assertEquals(2, br.getLineNumber());
- assertEquals('\n', br.readAgain());
- assertEquals(2, br.getLineNumber());
-
- assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.lookAhead());
- assertEquals('\n', br.readAgain());
- assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.read());
- assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.readAgain());
- assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.read());
- assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.lookAhead());
-
- }
-
+ public void testConstructors() {
+ ExtendedBufferedReader br = new ExtendedBufferedReader(new StringReader(""));
+ br = new ExtendedBufferedReader(new StringReader(""), 10);
+ }
- public void testReadLookahead2() throws Exception {
- char[] ref = new char[5];
- char[] res = new char[5];
-
- ExtendedBufferedReader br = getEBR("");
- assertEquals(0, br.read(res, 0, 0));
- assertTrue(Arrays.equals(res, ref));
-
- br = getEBR("abcdefg");
- ref[0] = 'a';
- ref[1] = 'b';
- ref[2] = 'c';
- assertEquals(3, br.read(res, 0, 3));
- assertTrue(Arrays.equals(res, ref));
- assertEquals('c', br.readAgain());
-
- assertEquals('d', br.lookAhead());
- ref[4] = 'd';
- assertEquals(1, br.read(res, 4, 1));
- assertTrue(Arrays.equals(res, ref));
- assertEquals('d', br.readAgain());
-
- }
-
- public void testReadLine() throws Exception {
- ExtendedBufferedReader br = getEBR("");
- assertTrue(br.readLine() == null);
-
- br = getEBR("\n");
- assertTrue(br.readLine().equals(""));
- assertTrue(br.readLine() == null);
-
- br = getEBR("foo\n\nhello");
- assertEquals(0, br.getLineNumber());
- assertTrue(br.readLine().equals("foo"));
- assertEquals(1, br.getLineNumber());
- assertTrue(br.readLine().equals(""));
- assertEquals(2, br.getLineNumber());
- assertTrue(br.readLine().equals("hello"));
- assertEquals(3, br.getLineNumber());
- assertTrue(br.readLine() == null);
- assertEquals(3, br.getLineNumber());
-
- br = getEBR("foo\n\nhello");
- assertEquals('f', br.read());
- assertEquals('o', br.lookAhead());
- assertTrue(br.readLine().equals("oo"));
- assertEquals(1, br.getLineNumber());
- assertEquals('\n', br.lookAhead());
- assertTrue(br.readLine().equals(""));
- assertEquals(2, br.getLineNumber());
- assertEquals('h', br.lookAhead());
- assertTrue(br.readLine().equals("hello"));
- assertTrue(br.readLine() == null);
- assertEquals(3, br.getLineNumber());
-
-
- br = getEBR("foo\rbaar\r\nfoo");
- assertTrue(br.readLine().equals("foo"));
- assertEquals('b', br.lookAhead());
- assertTrue(br.readLine().equals("baar"));
- assertEquals('f', br.lookAhead());
- assertTrue(br.readLine().equals("foo"));
- assertTrue(br.readLine() == null);
- }
-
- private ExtendedBufferedReader getEBR(String s) {
- return new ExtendedBufferedReader(new StringReader(s));
- }
+ public void testReadLookahead1() throws Exception {
+
+ assertEquals(ExtendedBufferedReader.END_OF_STREAM, getEBR("").read());
+ ExtendedBufferedReader br = getEBR("1\n2\r3\n");
+ assertEquals('1', br.lookAhead());
+ assertEquals(ExtendedBufferedReader.UNDEFINED, br.readAgain());
+ assertEquals('1', br.read());
+ assertEquals('1', br.readAgain());
+
+ assertEquals(0, br.getLineNumber());
+ assertEquals('\n', br.lookAhead());
+ assertEquals(0, br.getLineNumber());
+ assertEquals('1', br.readAgain());
+ assertEquals('\n', br.read());
+ assertEquals(1, br.getLineNumber());
+ assertEquals('\n', br.readAgain());
+ assertEquals(1, br.getLineNumber());
+
+ assertEquals('2', br.lookAhead());
+ assertEquals(1, br.getLineNumber());
+ assertEquals('\n', br.readAgain());
+ assertEquals(1, br.getLineNumber());
+ assertEquals('2', br.read());
+ assertEquals('2', br.readAgain());
+
+ assertEquals('\r', br.lookAhead());
+ assertEquals('2', br.readAgain());
+ assertEquals('\r', br.read());
+ assertEquals('\r', br.readAgain());
+
+ assertEquals('3', br.lookAhead());
+ assertEquals('\r', br.readAgain());
+ assertEquals('3', br.read());
+ assertEquals('3', br.readAgain());
+
+ assertEquals('\n', br.lookAhead());
+ assertEquals(1, br.getLineNumber());
+ assertEquals('3', br.readAgain());
+ assertEquals('\n', br.read());
+ assertEquals(2, br.getLineNumber());
+ assertEquals('\n', br.readAgain());
+ assertEquals(2, br.getLineNumber());
+
+ assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.lookAhead());
+ assertEquals('\n', br.readAgain());
+ assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.read());
+ assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.readAgain());
+ assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.read());
+ assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.lookAhead());
+
+ }
+
+
+ public void testReadLookahead2() throws Exception {
+ char[] ref = new char[5];
+ char[] res = new char[5];
+
+ ExtendedBufferedReader br = getEBR("");
+ assertEquals(0, br.read(res, 0, 0));
+ assertTrue(Arrays.equals(res, ref));
+
+ br = getEBR("abcdefg");
+ ref[0] = 'a';
+ ref[1] = 'b';
+ ref[2] = 'c';
+ assertEquals(3, br.read(res, 0, 3));
+ assertTrue(Arrays.equals(res, ref));
+ assertEquals('c', br.readAgain());
+
+ assertEquals('d', br.lookAhead());
+ ref[4] = 'd';
+ assertEquals(1, br.read(res, 4, 1));
+ assertTrue(Arrays.equals(res, ref));
+ assertEquals('d', br.readAgain());
+
+ }
+
+ public void testReadLine() throws Exception {
+ ExtendedBufferedReader br = getEBR("");
+ assertTrue(br.readLine() == null);
+
+ br = getEBR("\n");
+ assertTrue(br.readLine().equals(""));
+ assertTrue(br.readLine() == null);
+
+ br = getEBR("foo\n\nhello");
+ assertEquals(0, br.getLineNumber());
+ assertTrue(br.readLine().equals("foo"));
+ assertEquals(1, br.getLineNumber());
+ assertTrue(br.readLine().equals(""));
+ assertEquals(2, br.getLineNumber());
+ assertTrue(br.readLine().equals("hello"));
+ assertEquals(3, br.getLineNumber());
+ assertTrue(br.readLine() == null);
+ assertEquals(3, br.getLineNumber());
+
+ br = getEBR("foo\n\nhello");
+ assertEquals('f', br.read());
+ assertEquals('o', br.lookAhead());
+ assertTrue(br.readLine().equals("oo"));
+ assertEquals(1, br.getLineNumber());
+ assertEquals('\n', br.lookAhead());
+ assertTrue(br.readLine().equals(""));
+ assertEquals(2, br.getLineNumber());
+ assertEquals('h', br.lookAhead());
+ assertTrue(br.readLine().equals("hello"));
+ assertTrue(br.readLine() == null);
+ assertEquals(3, br.getLineNumber());
+
+
+ br = getEBR("foo\rbaar\r\nfoo");
+ assertTrue(br.readLine().equals("foo"));
+ assertEquals('b', br.lookAhead());
+ assertTrue(br.readLine().equals("baar"));
+ assertEquals('f', br.lookAhead());
+ assertTrue(br.readLine().equals("foo"));
+ assertTrue(br.readLine() == null);
+ }
+
+ private ExtendedBufferedReader getEBR(String s) {
+ return new ExtendedBufferedReader(new StringReader(s));
+ }
}
diff --git a/src/test/org/apache/commons/csv/writer/CSVConfigGuesserTest.java b/src/test/org/apache/commons/csv/writer/CSVConfigGuesserTest.java
index 90000080..d2d18e4e 100644
--- a/src/test/org/apache/commons/csv/writer/CSVConfigGuesserTest.java
+++ b/src/test/org/apache/commons/csv/writer/CSVConfigGuesserTest.java
@@ -41,10 +41,10 @@ public class CSVConfigGuesserTest extends TestCase {
guesser.setHasFieldHeader(true);
assertEquals(true, guesser.hasFieldHeader());
}
+
/**
* Test a format like
- * 1234 ; abcd ; 1234 ;
- *
+ * 1234 ; abcd ; 1234 ;
*/
public void testConfigGuess1() {
CSVConfig expected = new CSVConfig();
@@ -67,11 +67,11 @@ public class CSVConfigGuesserTest extends TestCase {
assertEquals(expected.getFields().length, guessed.getFields().length);
assertEquals(expected.getFields()[0].getSize(), guessed.getFields()[0].getSize());
}
+
/**
* Test a format like
- * 1234,123123,12312312,213123
- * 1,2,3,4
- *
+ * 1234,123123,12312312,213123
+ * 1,2,3,4
*/
public void testConfigGuess2() {
CSVConfig expected = new CSVConfig();
diff --git a/src/test/org/apache/commons/csv/writer/CSVConfigTest.java b/src/test/org/apache/commons/csv/writer/CSVConfigTest.java
index 0835776c..482167ec 100644
--- a/src/test/org/apache/commons/csv/writer/CSVConfigTest.java
+++ b/src/test/org/apache/commons/csv/writer/CSVConfigTest.java
@@ -29,7 +29,7 @@ import junit.framework.TestCase;
* @version $Id: $
*/
public class CSVConfigTest extends TestCase {
-
+
public void testFixedWith() {
CSVConfig config = new CSVConfig();
@@ -37,13 +37,13 @@ public class CSVConfigTest extends TestCase {
config.setFixedWidth(true);
assertEquals(true, config.isFixedWidth());
}
-
+
public void testFields() {
CSVConfig config = new CSVConfig();
assertEquals(0, config.getFields().length);
- config.setFields((CSVField[])null);
+ config.setFields((CSVField[]) null);
assertEquals(0, config.getFields().length);
- config.setFields((Collection)null);
+ config.setFields((Collection) null);
assertEquals(0, config.getFields().length);
CSVField field = new CSVField();
field.setName("field1");
@@ -53,7 +53,7 @@ public class CSVConfigTest extends TestCase {
assertEquals(null, config.getField("field11"));
assertEquals(field, config.getField("field1"));
}
-
+
public void testFill() {
CSVConfig config = new CSVConfig();
assertEquals(CSVConfig.FILLNONE, config.getFill());
@@ -65,7 +65,7 @@ public class CSVConfigTest extends TestCase {
config.setFillChar('m');
assertEquals('m', config.getFillChar());
}
-
+
public void testDelimiter() {
CSVConfig config = new CSVConfig();
assertEquals(',', config.getDelimiter());
@@ -75,7 +75,7 @@ public class CSVConfigTest extends TestCase {
config.setIgnoreDelimiter(true);
assertEquals(true, config.isDelimiterIgnored());
}
-
+
public void testValueDelimiter() {
CSVConfig config = new CSVConfig();
assertEquals('"', config.getValueDelimiter());
@@ -85,14 +85,14 @@ public class CSVConfigTest extends TestCase {
config.setIgnoreValueDelimiter(false);
assertEquals(false, config.isValueDelimiterIgnored());
}
-
+
public void testFieldHeader() {
CSVConfig config = new CSVConfig();
assertEquals(false, config.isFieldHeader());
config.setFieldHeader(true);
assertEquals(true, config.isFieldHeader());
}
-
+
public void testTrimEnd() {
CSVConfig config = new CSVConfig();
assertEquals(false, config.isEndTrimmed());
diff --git a/src/test/org/apache/commons/csv/writer/CSVFieldTest.java b/src/test/org/apache/commons/csv/writer/CSVFieldTest.java
index a5d216f6..df5796b4 100644
--- a/src/test/org/apache/commons/csv/writer/CSVFieldTest.java
+++ b/src/test/org/apache/commons/csv/writer/CSVFieldTest.java
@@ -21,7 +21,6 @@ package org.apache.commons.csv.writer;
import junit.framework.TestCase;
/**
- *
* @author Martin van den Bemt
* @version $Id: $
*/
@@ -41,7 +40,7 @@ public class CSVFieldTest extends TestCase {
assertEquals("name", field.getName());
assertEquals(10, field.getSize());
}
-
+
public void testFill() {
CSVField field = new CSVField();
assertEquals(CSVConfig.FILLNONE, field.getFill());
diff --git a/src/test/org/apache/commons/csv/writer/CSVWriterTest.java b/src/test/org/apache/commons/csv/writer/CSVWriterTest.java
index 39b989b6..9de5228a 100644
--- a/src/test/org/apache/commons/csv/writer/CSVWriterTest.java
+++ b/src/test/org/apache/commons/csv/writer/CSVWriterTest.java
@@ -26,17 +26,17 @@ import junit.framework.TestCase;
/**
* The testcase for the csv writer.
- *
+ *
* @author Martin van den Bemt
* @version $Id: $
*/
public class CSVWriterTest extends TestCase {
private Map map;
-
+
protected void setUp() throws Exception {
super.setUp();
-
+
map = new HashMap();
map.put("field1", "12345");
map.put("field2", "1234");