From 1166ca605bcc035654771f1ddc1092d86f2ec1e8 Mon Sep 17 00:00:00 2001 From: Jacopo Cappellato Date: Tue, 1 Feb 2011 08:46:00 +0000 Subject: [PATCH] No functional changes are contained in this commit: reformatted Java code to fix several formatting inconsistencies (between classes and within the same class); sorry for the big commit, but I have preferred to isolate into one commit all the formatting changes. git-svn-id: https://svn.apache.org/repos/asf/commons/sandbox/csv/trunk@1065950 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/commons/csv/CSVParser.java | 1095 +++++++++-------- .../org/apache/commons/csv/CSVPrinter.java | 505 ++++---- .../org/apache/commons/csv/CSVStrategy.java | 180 +-- src/java/org/apache/commons/csv/CSVUtils.java | 102 +- .../org/apache/commons/csv/CharBuffer.java | 73 +- .../commons/csv/ExtendedBufferedReader.java | 401 +++--- .../apache/commons/csv/writer/CSVConfig.java | 127 +- .../commons/csv/writer/CSVConfigGuesser.java | 57 +- .../apache/commons/csv/writer/CSVField.java | 13 +- .../apache/commons/csv/writer/CSVWriter.java | 33 +- .../org/apache/commons/csv/CSVParserTest.java | 1036 ++++++++-------- .../apache/commons/csv/CSVPrinterTest.java | 381 +++--- .../apache/commons/csv/CSVStrategyTest.java | 112 +- .../org/apache/commons/csv/CSVUtilsTest.java | 178 +-- .../apache/commons/csv/CharBufferTest.java | 77 +- .../csv/ExtendedBufferedReaderTest.java | 259 ++-- .../csv/writer/CSVConfigGuesserTest.java | 10 +- .../commons/csv/writer/CSVConfigTest.java | 18 +- .../commons/csv/writer/CSVFieldTest.java | 3 +- .../commons/csv/writer/CSVWriterTest.java | 6 +- 20 files changed, 2420 insertions(+), 2246 deletions(-) diff --git a/src/java/org/apache/commons/csv/CSVParser.java b/src/java/org/apache/commons/csv/CSVParser.java index 1c20de90..115a8e08 100644 --- a/src/java/org/apache/commons/csv/CSVParser.java +++ b/src/java/org/apache/commons/csv/CSVParser.java @@ -28,578 +28,609 @@ import java.util.ArrayList; * * Because CSV appears in many different dialects, the parser supports many * configuration settings by allowing the specification of a {@link CSVStrategy}. - * + * *

Parsing of a csv-string having tabs as separators, * '"' as an optional value encapsulator, and comments starting with '#':

*
- *  String[][] data = 
+ *  String[][] data =
  *   (new CSVParser(new StringReader("a\tb\nc\td"), new CSVStrategy('\t','"','#'))).getAllValues();
  * 
- * + * *

Parsing of a csv-string in Excel CSV format

*
  *  String[][] data =
  *   (new CSVParser(new StringReader("a;b\nc;d"), CSVStrategy.EXCEL_STRATEGY)).getAllValues();
  * 
- * + * *

* Internal parser state is completely covered by the strategy * and the reader-state.

- * - *

see package documentation + * + *

see package documentation * for more details

*/ public class CSVParser { - /** length of the initial token (content-)buffer */ - private static final int INITIAL_TOKEN_LENGTH = 50; - - // the token types - /** Token has no valid content, i.e. is in its initialized state. */ - protected static final int TT_INVALID = -1; - /** Token with content, at beginning or in the middle of a line. */ - protected static final int TT_TOKEN = 0; - /** Token (which can have content) when end of file is reached. */ - protected static final int TT_EOF = 1; - /** Token with content when end of a line is reached. */ - protected static final int TT_EORECORD = 2; + /** + * length of the initial token (content-)buffer + */ + private static final int INITIAL_TOKEN_LENGTH = 50; - /** Immutable empty String array. */ - private static final String[] EMPTY_STRING_ARRAY = new String[0]; - - // the input stream - private final ExtendedBufferedReader in; + // the token types + /** + * Token has no valid content, i.e. is in its initialized state. + */ + protected static final int TT_INVALID = -1; + /** + * Token with content, at beginning or in the middle of a line. + */ + protected static final int TT_TOKEN = 0; + /** + * Token (which can have content) when end of file is reached. + */ + protected static final int TT_EOF = 1; + /** + * Token with content when end of a line is reached. + */ + protected static final int TT_EORECORD = 2; - private final CSVStrategy strategy; - - // the following objects are shared to reduce garbage - /** A record buffer for getLine(). Grows as necessary and is reused. */ - private final ArrayList record = new ArrayList(); - private final Token reusableToken = new Token(); - private final CharBuffer wsBuf = new CharBuffer(); - private final CharBuffer code = new CharBuffer(4); + /** + * Immutable empty String array. + */ + private static final String[] EMPTY_STRING_ARRAY = new String[0]; - - /** - * Token is an internal token representation. - * - * It is used as contract between the lexer and the parser. - */ - static class Token { - /** Token type, see TT_xxx constants. */ - int type = TT_INVALID; - /** The content buffer. */ - CharBuffer content = new CharBuffer(INITIAL_TOKEN_LENGTH); - /** Token ready flag: indicates a valid token with content (ready for the parser). */ - boolean isReady; - - Token reset() { - content.clear(); - type = TT_INVALID; - isReady = false; - return this; - } - } - - // ====================================================== - // the constructor - // ====================================================== - - /** - * Default strategy for the parser follows the default {@link CSVStrategy}. - * - * @param input an InputStream containing "csv-formatted" stream - * @deprecated use {@link #CSVParser(Reader)}. - */ - public CSVParser(InputStream input) { - this(new InputStreamReader(input)); - } - - /** - * CSV parser using the default {@link CSVStrategy}. - * - * @param input a Reader containing "csv-formatted" input - */ - public CSVParser(Reader input) { - this(input, (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone()); - } - - /** - * Customized value delimiter parser. - * - * The parser follows the default {@link CSVStrategy} - * except for the delimiter setting. - * - * @param input a Reader based on "csv-formatted" input - * @param delimiter a Char used for value separation - * @deprecated use {@link #CSVParser(Reader,CSVStrategy)}. - */ - public CSVParser(Reader input, char delimiter) { - this(input, delimiter, '"', CSVStrategy.COMMENTS_DISABLED); - } - - /** - * Customized csv parser. - * - * The parser parses according to the given CSV dialect settings. - * Leading whitespaces are truncated, unicode escapes are - * not interpreted and empty lines are ignored. - * - * @param input a Reader based on "csv-formatted" input - * @param delimiter a Char used for value separation - * @param encapsulator a Char used as value encapsulation marker - * @param commentStart a Char used for comment identification - * @deprecated use {@link #CSVParser(Reader,CSVStrategy)}. - */ - public CSVParser(Reader input, char delimiter, char encapsulator, char commentStart) { - this(input, new CSVStrategy(delimiter, encapsulator, commentStart)); - } + // the input stream + private final ExtendedBufferedReader in; - /** - * Customized CSV parser using the given {@link CSVStrategy} - * - * @param input a Reader containing "csv-formatted" input - * @param strategy the CSVStrategy used for CSV parsing - */ - public CSVParser(Reader input, CSVStrategy strategy) { - this.in = new ExtendedBufferedReader(input); - this.strategy = strategy; - } - - // ====================================================== - // the parser - // ====================================================== - - /** - * Parses the CSV according to the given strategy - * and returns the content as an array of records - * (whereas records are arrays of single values). - *

- * The returned content starts at the current parse-position in - * the stream. - * - * @return matrix of records x values ('null' when end of file) - * @throws IOException on parse error or input read-failure - */ - public String[][] getAllValues() throws IOException { - ArrayList records = new ArrayList(); - String[] values; - String[][] ret = null; - while ((values = getLine()) != null) { - records.add(values); + private final CSVStrategy strategy; + + // the following objects are shared to reduce garbage + /** + * A record buffer for getLine(). Grows as necessary and is reused. + */ + private final ArrayList record = new ArrayList(); + private final Token reusableToken = new Token(); + private final CharBuffer wsBuf = new CharBuffer(); + private final CharBuffer code = new CharBuffer(4); + + + /** + * Token is an internal token representation. + *

+ * It is used as contract between the lexer and the parser. + */ + static class Token { + /** + * Token type, see TT_xxx constants. + */ + int type = TT_INVALID; + /** + * The content buffer. + */ + CharBuffer content = new CharBuffer(INITIAL_TOKEN_LENGTH); + /** + * Token ready flag: indicates a valid token with content (ready for the parser). + */ + boolean isReady; + + Token reset() { + content.clear(); + type = TT_INVALID; + isReady = false; + return this; + } } - if (records.size() > 0) { - ret = new String[records.size()][]; - records.toArray(ret); + + // ====================================================== + // the constructor + // ====================================================== + + /** + * Default strategy for the parser follows the default {@link CSVStrategy}. + * + * @param input an InputStream containing "csv-formatted" stream + * @deprecated use {@link #CSVParser(Reader)}. + */ + public CSVParser(InputStream input) { + this(new InputStreamReader(input)); } - return ret; - } - - /** - * Parses the CSV according to the given strategy - * and returns the next csv-value as string. - * - * @return next value in the input stream ('null' when end of file) - * @throws IOException on parse error or input read-failure - */ - public String nextValue() throws IOException { - Token tkn = nextToken(); - String ret = null; - switch (tkn.type) { - case TT_TOKEN: - case TT_EORECORD: - ret = tkn.content.toString(); - break; - case TT_EOF: - ret = null; - break; - case TT_INVALID: - default: - // error no token available (or error) - throw new IOException( - "(line " + getLineNumber() - + ") invalid parse sequence"); - // unreachable: break; + + /** + * CSV parser using the default {@link CSVStrategy}. + * + * @param input a Reader containing "csv-formatted" input + */ + public CSVParser(Reader input) { + this(input, (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone()); } - return ret; - } - - /** - * Parses from the current point in the stream til - * the end of the current line. - * - * @return array of values til end of line - * ('null' when end of file has been reached) - * @throws IOException on parse error or input read-failure - */ - public String[] getLine() throws IOException { - String[] ret = EMPTY_STRING_ARRAY; - record.clear(); - while (true) { - reusableToken.reset(); - nextToken(reusableToken); - switch (reusableToken.type) { + + /** + * Customized value delimiter parser. + *

+ * The parser follows the default {@link CSVStrategy} + * except for the delimiter setting. + * + * @param input a Reader based on "csv-formatted" input + * @param delimiter a Char used for value separation + * @deprecated use {@link #CSVParser(Reader, CSVStrategy)}. + */ + public CSVParser(Reader input, char delimiter) { + this(input, delimiter, '"', CSVStrategy.COMMENTS_DISABLED); + } + + /** + * Customized csv parser. + *

+ * The parser parses according to the given CSV dialect settings. + * Leading whitespaces are truncated, unicode escapes are + * not interpreted and empty lines are ignored. + * + * @param input a Reader based on "csv-formatted" input + * @param delimiter a Char used for value separation + * @param encapsulator a Char used as value encapsulation marker + * @param commentStart a Char used for comment identification + * @deprecated use {@link #CSVParser(Reader, CSVStrategy)}. + */ + public CSVParser(Reader input, char delimiter, char encapsulator, char commentStart) { + this(input, new CSVStrategy(delimiter, encapsulator, commentStart)); + } + + /** + * Customized CSV parser using the given {@link CSVStrategy} + * + * @param input a Reader containing "csv-formatted" input + * @param strategy the CSVStrategy used for CSV parsing + */ + public CSVParser(Reader input, CSVStrategy strategy) { + this.in = new ExtendedBufferedReader(input); + this.strategy = strategy; + } + + // ====================================================== + // the parser + // ====================================================== + + /** + * Parses the CSV according to the given strategy + * and returns the content as an array of records + * (whereas records are arrays of single values). + *

+ * The returned content starts at the current parse-position in + * the stream. + * + * @return matrix of records x values ('null' when end of file) + * @throws IOException on parse error or input read-failure + */ + public String[][] getAllValues() throws IOException { + ArrayList records = new ArrayList(); + String[] values; + String[][] ret = null; + while ((values = getLine()) != null) { + records.add(values); + } + if (records.size() > 0) { + ret = new String[records.size()][]; + records.toArray(ret); + } + return ret; + } + + /** + * Parses the CSV according to the given strategy + * and returns the next csv-value as string. + * + * @return next value in the input stream ('null' when end of file) + * @throws IOException on parse error or input read-failure + */ + public String nextValue() throws IOException { + Token tkn = nextToken(); + String ret = null; + switch (tkn.type) { case TT_TOKEN: - record.add(reusableToken.content.toString()); - break; case TT_EORECORD: - record.add(reusableToken.content.toString()); + ret = tkn.content.toString(); break; case TT_EOF: - if (reusableToken.isReady) { - record.add(reusableToken.content.toString()); - } else { - ret = null; - } + ret = null; break; case TT_INVALID: default: - // error: throw IOException - throw new IOException("(line " + getLineNumber() + ") invalid parse sequence"); - // unreachable: break; - } - if (reusableToken.type != TT_TOKEN) { - break; + // error no token available (or error) + throw new IOException( + "(line " + getLineNumber() + + ") invalid parse sequence"); + // unreachable: break; } + return ret; } - if (!record.isEmpty()) { - ret = (String[]) record.toArray(new String[record.size()]); - } - return ret; - } - - /** - * Returns the current line number in the input stream. - * - * ATTENTION: in case your csv has multiline-values the returned - * number does not correspond to the record-number - * - * @return current line number - */ - public int getLineNumber() { - return in.getLineNumber(); - } - - // ====================================================== - // the lexer(s) - // ====================================================== - - /** - * Convenience method for nextToken(null). - */ - protected Token nextToken() throws IOException { - return nextToken(new Token()); - } - - /** - * Returns the next token. - * - * A token corresponds to a term, a record change or an - * end-of-file indicator. - * - * @param tkn an existing Token object to reuse. The caller is responsible to initialize the - * Token. - * @return the next token found - * @throws IOException on stream access error - */ - protected Token nextToken(Token tkn) throws IOException { - wsBuf.clear(); // reuse - - // get the last read char (required for empty line detection) - int lastChar = in.readAgain(); - - // read the next char and set eol - /* note: unfortunately isEndOfLine may consumes a character silently. - * this has no effect outside of the method. so a simple workaround - * is to call 'readAgain' on the stream... - * uh: might using objects instead of base-types (jdk1.5 autoboxing!) + + /** + * Parses from the current point in the stream til + * the end of the current line. + * + * @return array of values til end of line + * ('null' when end of file has been reached) + * @throws IOException on parse error or input read-failure */ - int c = in.read(); - boolean eol = isEndOfLine(c); - c = in.readAgain(); - - // empty line detection: eol AND (last char was EOL or beginning) - while (strategy.getIgnoreEmptyLines() && eol - && (lastChar == '\n' - || lastChar == '\r' - || lastChar == ExtendedBufferedReader.UNDEFINED) - && !isEndOfFile(lastChar)) { - // go on char ahead ... - lastChar = c; - c = in.read(); - eol = isEndOfLine(c); - c = in.readAgain(); - // reached end of file without any content (empty line at the end) - if (isEndOfFile(c)) { - tkn.type = TT_EOF; - return tkn; - } - } - - // did we reach eof during the last iteration already ? TT_EOF - if (isEndOfFile(lastChar) || (lastChar != strategy.getDelimiter() && isEndOfFile(c))) { - tkn.type = TT_EOF; - return tkn; - } - - // important: make sure a new char gets consumed in each iteration - while (!tkn.isReady && tkn.type != TT_EOF) { - // ignore whitespaces at beginning of a token - while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) { - wsBuf.append((char) c); - c = in.read(); - eol = isEndOfLine(c); - } - // ok, start of token reached: comment, encapsulated, or token - if (c == strategy.getCommentStart()) { - // ignore everything till end of line and continue (incr linecount) - in.readLine(); - tkn = nextToken(tkn.reset()); - } else if (c == strategy.getDelimiter()) { - // empty token return TT_TOKEN("") - tkn.type = TT_TOKEN; - tkn.isReady = true; - } else if (eol) { - // empty token return TT_EORECORD("") - //noop: tkn.content.append(""); - tkn.type = TT_EORECORD; - tkn.isReady = true; - } else if (c == strategy.getEncapsulator()) { - // consume encapsulated token - encapsulatedTokenLexer(tkn, c); - } else if (isEndOfFile(c)) { - // end of file return TT_EOF() - //noop: tkn.content.append(""); - tkn.type = TT_EOF; - tkn.isReady = true; - } else { - // next token must be a simple token - // add removed blanks when not ignoring whitespace chars... - if (!strategy.getIgnoreLeadingWhitespaces()) { - tkn.content.append(wsBuf); - } - simpleTokenLexer(tkn, c); - } - } - return tkn; - } - - /** - * A simple token lexer - * - * Simple token are tokens which are not surrounded by encapsulators. - * A simple token might contain escaped delimiters (as \, or \;). The - * token is finished when one of the following conditions become true: - *

- * - * @param tkn the current token - * @param c the current character - * @return the filled token - * - * @throws IOException on stream access error - */ - private Token simpleTokenLexer(Token tkn, int c) throws IOException { - for (;;) { - if (isEndOfLine(c)) { - // end of record - tkn.type = TT_EORECORD; - tkn.isReady = true; - break; - } else if (isEndOfFile(c)) { - // end of file - tkn.type = TT_EOF; - tkn.isReady = true; - break; - } else if (c == strategy.getDelimiter()) { - // end of token - tkn.type = TT_TOKEN; - tkn.isReady = true; - break; - } else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') { - // interpret unicode escaped chars (like \u0070 -> p) - tkn.content.append((char) unicodeEscapeLexer(c)); - } else if (c == strategy.getEscape()) { - tkn.content.append((char)readEscape(c)); - } else { - tkn.content.append((char) c); - } - - c = in.read(); - } - - if (strategy.getIgnoreTrailingWhitespaces()) { - tkn.content.trimTrailingWhitespace(); - } - - return tkn; - } - - - /** - * An encapsulated token lexer - * - * Encapsulated tokens are surrounded by the given encapsulating-string. - * The encapsulator itself might be included in the token using a - * doubling syntax (as "", '') or using escaping (as in \", \'). - * Whitespaces before and after an encapsulated token are ignored. - * - * @param tkn the current token - * @param c the current character - * @return a valid token object - * @throws IOException on invalid state - */ - private Token encapsulatedTokenLexer(Token tkn, int c) throws IOException { - // save current line - int startLineNumber = getLineNumber(); - // ignore the given delimiter - // assert c == delimiter; - for (;;) { - c = in.read(); - - if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead()=='u') { - tkn.content.append((char) unicodeEscapeLexer(c)); - } else if (c == strategy.getEscape()) { - tkn.content.append((char)readEscape(c)); - } else if (c == strategy.getEncapsulator()) { - if (in.lookAhead() == strategy.getEncapsulator()) { - // double or escaped encapsulator -> add single encapsulator to token - c = in.read(); - tkn.content.append((char) c); - } else { - // token finish mark (encapsulator) reached: ignore whitespace till delimiter - for (;;) { - c = in.read(); - if (c == strategy.getDelimiter()) { - tkn.type = TT_TOKEN; - tkn.isReady = true; - return tkn; - } else if (isEndOfFile(c)) { - tkn.type = TT_EOF; - tkn.isReady = true; - return tkn; - } else if (isEndOfLine(c)) { - // ok eo token reached - tkn.type = TT_EORECORD; - tkn.isReady = true; - return tkn; - } else if (!isWhitespace(c)) { - // error invalid char between token and next delimiter - throw new IOException( - "(line " + getLineNumber() - + ") invalid char between encapsulated token end delimiter" - ); + public String[] getLine() throws IOException { + String[] ret = EMPTY_STRING_ARRAY; + record.clear(); + while (true) { + reusableToken.reset(); + nextToken(reusableToken); + switch (reusableToken.type) { + case TT_TOKEN: + record.add(reusableToken.content.toString()); + break; + case TT_EORECORD: + record.add(reusableToken.content.toString()); + break; + case TT_EOF: + if (reusableToken.isReady) { + record.add(reusableToken.content.toString()); + } else { + ret = null; + } + break; + case TT_INVALID: + default: + // error: throw IOException + throw new IOException("(line " + getLineNumber() + ") invalid parse sequence"); + // unreachable: break; + } + if (reusableToken.type != TT_TOKEN) { + break; } - } } - } else if (isEndOfFile(c)) { - // error condition (end of file before end of token) - throw new IOException( - "(startline " + startLineNumber + ")" - + "eof reached before encapsulated token finished" - ); - } else { - // consume character - tkn.content.append((char) c); - } - } - } - - - /** - * Decodes Unicode escapes. - * - * Interpretation of "\\uXXXX" escape sequences - * where XXXX is a hex-number. - * @param c current char which is discarded because it's the "\\" of "\\uXXXX" - * @return the decoded character - * @throws IOException on wrong unicode escape sequence or read error - */ - protected int unicodeEscapeLexer(int c) throws IOException { - int ret = 0; - // ignore 'u' (assume c==\ now) and read 4 hex digits - c = in.read(); - code.clear(); - try { - for (int i = 0; i < 4; i++) { - c = in.read(); - if (isEndOfFile(c) || isEndOfLine(c)) { - throw new NumberFormatException("number too short"); + if (!record.isEmpty()) { + ret = (String[]) record.toArray(new String[record.size()]); } - code.append((char) c); - } - ret = Integer.parseInt(code.toString(), 16); - } catch (NumberFormatException e) { - throw new IOException( - "(line " + getLineNumber() + ") Wrong unicode escape sequence found '" - + code.toString() + "'" + e.toString()); + return ret; } - return ret; - } - private int readEscape(int c) throws IOException { - // assume c is the escape char (normally a backslash) - c = in.read(); - int out; - switch (c) { - case 'r': out='\r'; break; - case 'n': out='\n'; break; - case 't': out='\t'; break; - case 'b': out='\b'; break; - case 'f': out='\f'; break; - default : out=c; + /** + * Returns the current line number in the input stream. + *

+ * ATTENTION: in case your csv has multiline-values the returned + * number does not correspond to the record-number + * + * @return current line number + */ + public int getLineNumber() { + return in.getLineNumber(); } - return out; - } - - // ====================================================== - // strategies - // ====================================================== - - /** - * Obtain the specified CSV Strategy. This should not be modified. - * - * @return strategy currently being used - */ - public CSVStrategy getStrategy() { - return this.strategy; - } - - // ====================================================== - // Character class checker - // ====================================================== - - /** - * @return true if the given char is a whitespace character - */ - private boolean isWhitespace(int c) { - return Character.isWhitespace((char) c) && (c != strategy.getDelimiter()); - } - - /** - * Greedy - accepts \n, \r and \r\n - * This checker consumes silently the second control-character... - * - * @return true if the given character is a line-terminator - */ - private boolean isEndOfLine(int c) throws IOException { - // check if we have \r\n... - if (c == '\r') { - if (in.lookAhead() == '\n') { - // note: does not change c outside of this method !! + + // ====================================================== + // the lexer(s) + // ====================================================== + + /** + * Convenience method for nextToken(null). + */ + protected Token nextToken() throws IOException { + return nextToken(new Token()); + } + + /** + * Returns the next token. + *

+ * A token corresponds to a term, a record change or an + * end-of-file indicator. + * + * @param tkn an existing Token object to reuse. The caller is responsible to initialize the + * Token. + * @return the next token found + * @throws IOException on stream access error + */ + protected Token nextToken(Token tkn) throws IOException { + wsBuf.clear(); // reuse + + // get the last read char (required for empty line detection) + int lastChar = in.readAgain(); + + // read the next char and set eol + /* note: unfortunately isEndOfLine may consumes a character silently. + * this has no effect outside of the method. so a simple workaround + * is to call 'readAgain' on the stream... + * uh: might using objects instead of base-types (jdk1.5 autoboxing!) + */ + int c = in.read(); + boolean eol = isEndOfLine(c); + c = in.readAgain(); + + // empty line detection: eol AND (last char was EOL or beginning) + while (strategy.getIgnoreEmptyLines() && eol + && (lastChar == '\n' + || lastChar == '\r' + || lastChar == ExtendedBufferedReader.UNDEFINED) + && !isEndOfFile(lastChar)) { + // go on char ahead ... + lastChar = c; + c = in.read(); + eol = isEndOfLine(c); + c = in.readAgain(); + // reached end of file without any content (empty line at the end) + if (isEndOfFile(c)) { + tkn.type = TT_EOF; + return tkn; + } + } + + // did we reach eof during the last iteration already ? TT_EOF + if (isEndOfFile(lastChar) || (lastChar != strategy.getDelimiter() && isEndOfFile(c))) { + tkn.type = TT_EOF; + return tkn; + } + + // important: make sure a new char gets consumed in each iteration + while (!tkn.isReady && tkn.type != TT_EOF) { + // ignore whitespaces at beginning of a token + while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) { + wsBuf.append((char) c); + c = in.read(); + eol = isEndOfLine(c); + } + // ok, start of token reached: comment, encapsulated, or token + if (c == strategy.getCommentStart()) { + // ignore everything till end of line and continue (incr linecount) + in.readLine(); + tkn = nextToken(tkn.reset()); + } else if (c == strategy.getDelimiter()) { + // empty token return TT_TOKEN("") + tkn.type = TT_TOKEN; + tkn.isReady = true; + } else if (eol) { + // empty token return TT_EORECORD("") + //noop: tkn.content.append(""); + tkn.type = TT_EORECORD; + tkn.isReady = true; + } else if (c == strategy.getEncapsulator()) { + // consume encapsulated token + encapsulatedTokenLexer(tkn, c); + } else if (isEndOfFile(c)) { + // end of file return TT_EOF() + //noop: tkn.content.append(""); + tkn.type = TT_EOF; + tkn.isReady = true; + } else { + // next token must be a simple token + // add removed blanks when not ignoring whitespace chars... + if (!strategy.getIgnoreLeadingWhitespaces()) { + tkn.content.append(wsBuf); + } + simpleTokenLexer(tkn, c); + } + } + return tkn; + } + + /** + * A simple token lexer + *

+ * Simple token are tokens which are not surrounded by encapsulators. + * A simple token might contain escaped delimiters (as \, or \;). The + * token is finished when one of the following conditions become true: + *

+ * + * @param tkn the current token + * @param c the current character + * @return the filled token + * @throws IOException on stream access error + */ + private Token simpleTokenLexer(Token tkn, int c) throws IOException { + for (; ;) { + if (isEndOfLine(c)) { + // end of record + tkn.type = TT_EORECORD; + tkn.isReady = true; + break; + } else if (isEndOfFile(c)) { + // end of file + tkn.type = TT_EOF; + tkn.isReady = true; + break; + } else if (c == strategy.getDelimiter()) { + // end of token + tkn.type = TT_TOKEN; + tkn.isReady = true; + break; + } else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') { + // interpret unicode escaped chars (like \u0070 -> p) + tkn.content.append((char) unicodeEscapeLexer(c)); + } else if (c == strategy.getEscape()) { + tkn.content.append((char) readEscape(c)); + } else { + tkn.content.append((char) c); + } + + c = in.read(); + } + + if (strategy.getIgnoreTrailingWhitespaces()) { + tkn.content.trimTrailingWhitespace(); + } + + return tkn; + } + + + /** + * An encapsulated token lexer + *

+ * Encapsulated tokens are surrounded by the given encapsulating-string. + * The encapsulator itself might be included in the token using a + * doubling syntax (as "", '') or using escaping (as in \", \'). + * Whitespaces before and after an encapsulated token are ignored. + * + * @param tkn the current token + * @param c the current character + * @return a valid token object + * @throws IOException on invalid state + */ + private Token encapsulatedTokenLexer(Token tkn, int c) throws IOException { + // save current line + int startLineNumber = getLineNumber(); + // ignore the given delimiter + // assert c == delimiter; + for (; ;) { + c = in.read(); + + if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') { + tkn.content.append((char) unicodeEscapeLexer(c)); + } else if (c == strategy.getEscape()) { + tkn.content.append((char) readEscape(c)); + } else if (c == strategy.getEncapsulator()) { + if (in.lookAhead() == strategy.getEncapsulator()) { + // double or escaped encapsulator -> add single encapsulator to token + c = in.read(); + tkn.content.append((char) c); + } else { + // token finish mark (encapsulator) reached: ignore whitespace till delimiter + for (; ;) { + c = in.read(); + if (c == strategy.getDelimiter()) { + tkn.type = TT_TOKEN; + tkn.isReady = true; + return tkn; + } else if (isEndOfFile(c)) { + tkn.type = TT_EOF; + tkn.isReady = true; + return tkn; + } else if (isEndOfLine(c)) { + // ok eo token reached + tkn.type = TT_EORECORD; + tkn.isReady = true; + return tkn; + } else if (!isWhitespace(c)) { + // error invalid char between token and next delimiter + throw new IOException( + "(line " + getLineNumber() + + ") invalid char between encapsulated token end delimiter" + ); + } + } + } + } else if (isEndOfFile(c)) { + // error condition (end of file before end of token) + throw new IOException( + "(startline " + startLineNumber + ")" + + "eof reached before encapsulated token finished" + ); + } else { + // consume character + tkn.content.append((char) c); + } + } + } + + + /** + * Decodes Unicode escapes. + *

+ * Interpretation of "\\uXXXX" escape sequences + * where XXXX is a hex-number. + * + * @param c current char which is discarded because it's the "\\" of "\\uXXXX" + * @return the decoded character + * @throws IOException on wrong unicode escape sequence or read error + */ + protected int unicodeEscapeLexer(int c) throws IOException { + int ret = 0; + // ignore 'u' (assume c==\ now) and read 4 hex digits c = in.read(); - } + code.clear(); + try { + for (int i = 0; i < 4; i++) { + c = in.read(); + if (isEndOfFile(c) || isEndOfLine(c)) { + throw new NumberFormatException("number too short"); + } + code.append((char) c); + } + ret = Integer.parseInt(code.toString(), 16); + } catch (NumberFormatException e) { + throw new IOException( + "(line " + getLineNumber() + ") Wrong unicode escape sequence found '" + + code.toString() + "'" + e.toString()); + } + return ret; + } + + private int readEscape(int c) throws IOException { + // assume c is the escape char (normally a backslash) + c = in.read(); + int out; + switch (c) { + case 'r': + out = '\r'; + break; + case 'n': + out = '\n'; + break; + case 't': + out = '\t'; + break; + case 'b': + out = '\b'; + break; + case 'f': + out = '\f'; + break; + default: + out = c; + } + return out; + } + + // ====================================================== + // strategies + // ====================================================== + + /** + * Obtain the specified CSV Strategy. This should not be modified. + * + * @return strategy currently being used + */ + public CSVStrategy getStrategy() { + return this.strategy; + } + + // ====================================================== + // Character class checker + // ====================================================== + + /** + * @return true if the given char is a whitespace character + */ + private boolean isWhitespace(int c) { + return Character.isWhitespace((char) c) && (c != strategy.getDelimiter()); + } + + /** + * Greedy - accepts \n, \r and \r\n + * This checker consumes silently the second control-character... + * + * @return true if the given character is a line-terminator + */ + private boolean isEndOfLine(int c) throws IOException { + // check if we have \r\n... + if (c == '\r') { + if (in.lookAhead() == '\n') { + // note: does not change c outside of this method !! + c = in.read(); + } + } + return (c == '\n' || c == '\r'); + } + + /** + * @return true if the given character indicates end of file + */ + private boolean isEndOfFile(int c) { + return c == ExtendedBufferedReader.END_OF_STREAM; } - return (c == '\n' || c == '\r'); - } - - /** - * @return true if the given character indicates end of file - */ - private boolean isEndOfFile(int c) { - return c == ExtendedBufferedReader.END_OF_STREAM; - } } diff --git a/src/java/org/apache/commons/csv/CSVPrinter.java b/src/java/org/apache/commons/csv/CSVPrinter.java index 2193c11e..fd2ebc26 100644 --- a/src/java/org/apache/commons/csv/CSVPrinter.java +++ b/src/java/org/apache/commons/csv/CSVPrinter.java @@ -26,282 +26,289 @@ import java.io.Writer; */ public class CSVPrinter { - /** The place that the values get written. */ - protected final Writer out; - protected final CSVStrategy strategy; + /** + * The place that the values get written. + */ + protected final Writer out; + protected final CSVStrategy strategy; - /** True if we just began a new line. */ - protected boolean newLine = true; + /** + * True if we just began a new line. + */ + protected boolean newLine = true; - protected char[] buf = new char[0]; // temporary buffer + protected char[] buf = new char[0]; // temporary buffer - /** - * Create a printer that will print values to the given - * stream following the CSVStrategy. - * - * Currently, only a pure encapsulation strategy or a pure escaping strategy - * is supported. Hybrid strategies (encapsulation and escaping with a different character) are not supported. - * - * @param out stream to which to print. - * @param strategy describes the CSV variation. - */ - public CSVPrinter(Writer out, CSVStrategy strategy) { - this.out = out; - this.strategy = strategy==null ? CSVStrategy.DEFAULT_STRATEGY : strategy; - } - - // ====================================================== - // printing implementation - // ====================================================== - - /** - * Output a blank line - */ - public void println() throws IOException { - out.write(strategy.getPrinterNewline()); - newLine = true; - } - - public void flush() throws IOException { - out.flush(); - } - - - /** - * Print a single line of comma separated values. - * The values will be quoted if needed. Quotes and - * newLine characters will be escaped. - * - * @param values values to be outputted. - */ - public void println(String[] values) throws IOException { - for (int i = 0; i < values.length; i++) { - print(values[i]); - } - println(); - } - - - /** - * Put a comment among the comma separated values. - * Comments will always begin on a new line and occupy a - * least one full line. The character specified to star - * comments and a space will be inserted at the beginning of - * each new line in the comment. - * - * @param comment the comment to output - */ - public void printlnComment(String comment) throws IOException { - if(this.strategy.isCommentingDisabled()) { - return; - } - if (!newLine) { - println(); - } - out.write(this.strategy.getCommentStart()); - out.write(' '); - for (int i = 0; i < comment.length(); i++) { - char c = comment.charAt(i); - switch (c) { - case '\r' : - if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') { - i++; - } - // break intentionally excluded. - case '\n' : - println(); - out.write(this.strategy.getCommentStart()); - out.write(' '); - break; - default : - out.write(c); - break; - } - } - println(); - } - - - public void print(char[] value, int offset, int len, boolean checkForEscape) throws IOException { - if (!checkForEscape) { - printSep(); - out.write(value, offset, len); - return; + /** + * Create a printer that will print values to the given + * stream following the CSVStrategy. + *

+ * Currently, only a pure encapsulation strategy or a pure escaping strategy + * is supported. Hybrid strategies (encapsulation and escaping with a different character) are not supported. + * + * @param out stream to which to print. + * @param strategy describes the CSV variation. + */ + public CSVPrinter(Writer out, CSVStrategy strategy) { + this.out = out; + this.strategy = strategy == null ? CSVStrategy.DEFAULT_STRATEGY : strategy; } - if (strategy.getEncapsulator() != CSVStrategy.ENCAPSULATOR_DISABLED) { - printAndEncapsulate(value, offset, len); - } else if (strategy.getEscape() != CSVStrategy.ESCAPE_DISABLED) { - printAndEscape(value, offset, len); - } else { - printSep(); - out.write(value, offset, len); + // ====================================================== + // printing implementation + // ====================================================== + + /** + * Output a blank line + */ + public void println() throws IOException { + out.write(strategy.getPrinterNewline()); + newLine = true; } - } - void printSep() throws IOException { - if (newLine) { - newLine = false; - } else { - out.write(this.strategy.getDelimiter()); + public void flush() throws IOException { + out.flush(); } - } - void printAndEscape(char[] value, int offset, int len) throws IOException { - int start = offset; - int pos = offset; - int end = offset + len; - printSep(); - - char delim = this.strategy.getDelimiter(); - char escape = this.strategy.getEscape(); - - while (pos < end) { - char c = value[pos]; - if (c == '\r' || c=='\n' || c==delim || c==escape) { - // write out segment up until this char - int l = pos-start; - if (l>0) { - out.write(value, start, l); + /** + * Print a single line of comma separated values. + * The values will be quoted if needed. Quotes and + * newLine characters will be escaped. + * + * @param values values to be outputted. + */ + public void println(String[] values) throws IOException { + for (int i = 0; i < values.length; i++) { + print(values[i]); } - if (c=='\n') c='n'; - else if (c=='\r') c='r'; - - out.write(escape); - out.write(c); - - start = pos+1; // start on the current char after this one - } - - pos++; + println(); } - // write last segment - int l = pos-start; - if (l>0) { - out.write(value, start, l); + + /** + * Put a comment among the comma separated values. + * Comments will always begin on a new line and occupy a + * least one full line. The character specified to star + * comments and a space will be inserted at the beginning of + * each new line in the comment. + * + * @param comment the comment to output + */ + public void printlnComment(String comment) throws IOException { + if (this.strategy.isCommentingDisabled()) { + return; + } + if (!newLine) { + println(); + } + out.write(this.strategy.getCommentStart()); + out.write(' '); + for (int i = 0; i < comment.length(); i++) { + char c = comment.charAt(i); + switch (c) { + case '\r': + if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') { + i++; + } + // break intentionally excluded. + case '\n': + println(); + out.write(this.strategy.getCommentStart()); + out.write(' '); + break; + default: + out.write(c); + break; + } + } + println(); } - } - void printAndEncapsulate(char[] value, int offset, int len) throws IOException { - boolean first = newLine; // is this the first value on this line? - boolean quote = false; - int start = offset; - int pos = offset; - int end = offset + len; - printSep(); + public void print(char[] value, int offset, int len, boolean checkForEscape) throws IOException { + if (!checkForEscape) { + printSep(); + out.write(value, offset, len); + return; + } - char delim = this.strategy.getDelimiter(); - char encapsulator = this.strategy.getEncapsulator(); + if (strategy.getEncapsulator() != CSVStrategy.ENCAPSULATOR_DISABLED) { + printAndEncapsulate(value, offset, len); + } else if (strategy.getEscape() != CSVStrategy.ESCAPE_DISABLED) { + printAndEscape(value, offset, len); + } else { + printSep(); + out.write(value, offset, len); + } + } - if (len <= 0) { - // always quote an empty token that is the first - // on the line, as it may be the only thing on the - // line. If it were not quoted in that case, - // an empty line has no tokens. - if (first) { - quote = true; - } - } else { - char c = value[pos]; + void printSep() throws IOException { + if (newLine) { + newLine = false; + } else { + out.write(this.strategy.getDelimiter()); + } + } + + void printAndEscape(char[] value, int offset, int len) throws IOException { + int start = offset; + int pos = offset; + int end = offset + len; + + printSep(); + + char delim = this.strategy.getDelimiter(); + char escape = this.strategy.getEscape(); - // Hmmm, where did this rule come from? - if (first - && (c < '0' - || (c > '9' && c < 'A') - || (c > 'Z' && c < 'a') - || (c > 'z'))) { - quote = true; - // } else if (c == ' ' || c == '\f' || c == '\t') { - } else if (c <= '#') { - // Some other chars at the start of a value caused the parser to fail, so for now - // encapsulate if we start in anything less than '#'. We are being conservative - // by including the default comment char too. - quote = true; - } else { while (pos < end) { - c = value[pos]; - if (c=='\n' || c=='\r' || c==encapsulator || c==delim) { - quote = true; - break; - } - pos++; + char c = value[pos]; + if (c == '\r' || c == '\n' || c == delim || c == escape) { + // write out segment up until this char + int l = pos - start; + if (l > 0) { + out.write(value, start, l); + } + if (c == '\n') { + c = 'n'; + } else if (c == '\r') { + c = 'r'; + } + + out.write(escape); + out.write(c); + + start = pos + 1; // start on the current char after this one + } + + pos++; + } + + // write last segment + int l = pos - start; + if (l > 0) { + out.write(value, start, l); + } + } + + void printAndEncapsulate(char[] value, int offset, int len) throws IOException { + boolean first = newLine; // is this the first value on this line? + boolean quote = false; + int start = offset; + int pos = offset; + int end = offset + len; + + printSep(); + + char delim = this.strategy.getDelimiter(); + char encapsulator = this.strategy.getEncapsulator(); + + if (len <= 0) { + // always quote an empty token that is the first + // on the line, as it may be the only thing on the + // line. If it were not quoted in that case, + // an empty line has no tokens. + if (first) { + quote = true; + } + } else { + char c = value[pos]; + + // Hmmm, where did this rule come from? + if (first + && (c < '0' + || (c > '9' && c < 'A') + || (c > 'Z' && c < 'a') + || (c > 'z'))) { + quote = true; + // } else if (c == ' ' || c == '\f' || c == '\t') { + } else if (c <= '#') { + // Some other chars at the start of a value caused the parser to fail, so for now + // encapsulate if we start in anything less than '#'. We are being conservative + // by including the default comment char too. + quote = true; + } else { + while (pos < end) { + c = value[pos]; + if (c == '\n' || c == '\r' || c == encapsulator || c == delim) { + quote = true; + break; + } + pos++; + } + + if (!quote) { + pos = end - 1; + c = value[pos]; + // if (c == ' ' || c == '\f' || c == '\t') { + // Some other chars at the end caused the parser to fail, so for now + // encapsulate if we end in anything less than ' ' + if (c <= ' ') { + quote = true; + } + } + } } if (!quote) { - pos = end-1; - c = value[pos]; - // if (c == ' ' || c == '\f' || c == '\t') { - // Some other chars at the end caused the parser to fail, so for now - // encapsulate if we end in anything less than ' ' - if (c <= ' ') { - quote = true; - } + // no encapsulation needed - write out the original value + out.write(value, offset, len); + return; } - } + + // we hit something that needed encapsulation + out.write(encapsulator); + + // Pick up where we left off: pos should be positioned on the first character that caused + // the need for encapsulation. + while (pos < end) { + char c = value[pos]; + if (c == encapsulator) { + // write out the chunk up until this point + + // add 1 to the length to write out the encapsulator also + out.write(value, start, pos - start + 1); + // put the next starting position on the encapsulator so we will + // write it out again with the next string (effectively doubling it) + start = pos; + } + pos++; + } + + // write the last segment + out.write(value, start, pos - start); + out.write(encapsulator); } - if (!quote) { - // no encapsulation needed - write out the original value - out.write(value, offset, len); - return; + /** + * Print the string as the next value on the line. The value + * will be escaped or encapsulated as needed if checkForEscape==true + * + * @param value value to be outputted. + */ + public void print(String value, boolean checkForEscape) throws IOException { + if (!checkForEscape) { + // write directly from string + printSep(); + out.write(value); + return; + } + + if (buf.length < value.length()) { + buf = new char[value.length()]; + } + + value.getChars(0, value.length(), buf, 0); + print(buf, 0, value.length(), checkForEscape); } - // we hit something that needed encapsulation - out.write(encapsulator); - - // Pick up where we left off: pos should be positioned on the first character that caused - // the need for encapsulation. - while (posCSVUtils instances should NOT be constructed in - * standard programming. + * standard programming. * *

This constructor is public to permit tools that require a JavaBean * instance to operate.

*/ public CSVUtils() { } - + /** * Converts an array of string values into a single CSV line. All * null values are converted to the string "null", @@ -46,13 +46,13 @@ public class CSVUtils { * * @param values the value array * @return the CSV string, will be an empty string if the length of the - * value array is 0 + * value array is 0 */ public static String printLine(String[] values, CSVStrategy strategy) { // set up a CSVUtils StringWriter stringWriter = new StringWriter(); CSVPrinter csvPrinter = new CSVPrinter(stringWriter, strategy); - + // check for null values an "null" as strings and convert them // into the strings "null" and "\"null\"" for (int i = 0; i < values.length; i++) { @@ -62,60 +62,60 @@ public class CSVUtils { values[i] = "\"null\""; } } - + // convert to CSV try { - csvPrinter.println(values); + csvPrinter.println(values); } catch (IOException e) { - // should not happen with StringWriter + // should not happen with StringWriter } // as the resulting string has \r\n at the end, we will trim that away return stringWriter.toString().trim(); } - - // ====================================================== - // static parsers - // ====================================================== - - /** - * Parses the given String according to the default {@link CSVStrategy}. - * - * @param s CSV String to be parsed. - * @return parsed String matrix (which is never null) - * @throws IOException in case of error - */ - public static String[][] parse(String s) throws IOException { - if (s == null) { - throw new IllegalArgumentException("Null argument not allowed."); + + // ====================================================== + // static parsers + // ====================================================== + + /** + * Parses the given String according to the default {@link CSVStrategy}. + * + * @param s CSV String to be parsed. + * @return parsed String matrix (which is never null) + * @throws IOException in case of error + */ + public static String[][] parse(String s) throws IOException { + if (s == null) { + throw new IllegalArgumentException("Null argument not allowed."); + } + String[][] result = (new CSVParser(new StringReader(s))).getAllValues(); + if (result == null) { + // since CSVStrategy ignores empty lines an empty array is returned + // (i.e. not "result = new String[][] {{""}};") + result = EMPTY_DOUBLE_STRING_ARRAY; + } + return result; } - String[][] result = (new CSVParser(new StringReader(s))).getAllValues(); - if (result == null) { - // since CSVStrategy ignores empty lines an empty array is returned - // (i.e. not "result = new String[][] {{""}};") - result = EMPTY_DOUBLE_STRING_ARRAY; + + /** + * Parses the first line only according to the default {@link CSVStrategy}. + * + * Parsing empty string will be handled as valid records containing zero + * elements, so the following property holds: parseLine("").length == 0. + * + * @param s CSV String to be parsed. + * @return parsed String vector (which is never null) + * @throws IOException in case of error + */ + public static String[] parseLine(String s) throws IOException { + if (s == null) { + throw new IllegalArgumentException("Null argument not allowed."); + } + // uh,jh: make sure that parseLine("").length == 0 + if (s.length() == 0) { + return EMPTY_STRING_ARRAY; + } + return (new CSVParser(new StringReader(s))).getLine(); } - return result; - } - - /** - * Parses the first line only according to the default {@link CSVStrategy}. - * - * Parsing empty string will be handled as valid records containing zero - * elements, so the following property holds: parseLine("").length == 0. - * - * @param s CSV String to be parsed. - * @return parsed String vector (which is never null) - * @throws IOException in case of error - */ - public static String[] parseLine(String s) throws IOException { - if (s == null) { - throw new IllegalArgumentException("Null argument not allowed."); - } - // uh,jh: make sure that parseLine("").length == 0 - if (s.length() == 0) { - return EMPTY_STRING_ARRAY; - } - return (new CSVParser(new StringReader(s))).getLine(); - } - + } diff --git a/src/java/org/apache/commons/csv/CharBuffer.java b/src/java/org/apache/commons/csv/CharBuffer.java index d1ccc801..94bffebb 100644 --- a/src/java/org/apache/commons/csv/CharBuffer.java +++ b/src/java/org/apache/commons/csv/CharBuffer.java @@ -19,11 +19,11 @@ package org.apache.commons.csv; /** - * A simple StringBuffer replacement that aims to + * A simple StringBuffer replacement that aims to * reduce copying as much as possible. The buffer * grows as necessary. * This class is not thread safe. - * + * * @author Ortwin Glïż½ck */ public class CharBuffer { @@ -31,21 +31,21 @@ public class CharBuffer { private char[] c; /** - * Actually used number of characters in the array. + * Actually used number of characters in the array. * It is also the index at which - * a new character will be inserted into c. - */ + * a new character will be inserted into c. + */ private int length; - + /** * Creates a new CharBuffer with an initial capacity of 32 characters. */ public CharBuffer() { this(32); } - + /** - * Creates a new CharBuffer with an initial capacity + * Creates a new CharBuffer with an initial capacity * of length characters. */ public CharBuffer(final int length) { @@ -54,16 +54,17 @@ public class CharBuffer { } this.c = new char[length]; } - + /** * Empties the buffer. The capacity still remains the same, so no memory is freed. */ public void clear() { length = 0; } - + /** * Returns the number of characters in the buffer. + * * @return the number of characters */ public int length() { @@ -72,16 +73,18 @@ public class CharBuffer { /** * Returns the current capacity of the buffer. + * * @return the maximum number of characters that can be stored in this buffer without - * resizing it. + * resizing it. */ public int capacity() { return c.length; } - + /** * Appends the contents of cb to the end of this CharBuffer. + * * @param cb the CharBuffer to append or null */ public void append(final CharBuffer cb) { @@ -92,10 +95,11 @@ public class CharBuffer { System.arraycopy(cb.c, 0, c, length, cb.length); length += cb.length; } - + /** * Appends s to the end of this CharBuffer. * This method involves copying the new data once! + * * @param s the String to append or null */ public void append(final String s) { @@ -104,10 +108,11 @@ public class CharBuffer { } append(s.toCharArray()); } - + /** * Appends sb to the end of this CharBuffer. * This method involves copying the new data once! + * * @param sb the StringBuffer to append or null */ public void append(final StringBuffer sb) { @@ -118,10 +123,11 @@ public class CharBuffer { sb.getChars(0, sb.length(), c, length); length += sb.length(); } - + /** * Appends data to the end of this CharBuffer. * This method involves copying the new data once! + * * @param data the char[] to append or null */ public void append(final char[] data) { @@ -132,10 +138,11 @@ public class CharBuffer { System.arraycopy(data, 0, c, length, data.length); length += data.length; } - + /** * Appends a single character to the end of this CharBuffer. * This method involves copying the new data once! + * * @param data the char to append */ public void append(final char data) { @@ -143,7 +150,7 @@ public class CharBuffer { c[length] = data; length++; } - + /** * Shrinks the capacity of the buffer to the current length if necessary. * This method involves copying the data once! @@ -157,13 +164,13 @@ public class CharBuffer { c = newc; } - /** - * Removes trailing whitespace. - */ + /** + * Removes trailing whitespace. + */ public void trimTrailingWhitespace() { - while (length>0 && Character.isWhitespace(c[length-1])) { - length--; - } + while (length > 0 && Character.isWhitespace(c[length - 1])) { + length--; + } } /** @@ -172,6 +179,7 @@ public class CharBuffer { * modifying it. * This method allows to avoid copying if the caller knows the exact capacity * before. + * * @return */ public char[] getCharacters() { @@ -183,16 +191,17 @@ public class CharBuffer { return chars; } - /** - * Returns the character at the specified position. - */ + /** + * Returns the character at the specified position. + */ public char charAt(int pos) { - return c[pos]; - } + return c[pos]; + } /** * Converts the contents of the buffer into a StringBuffer. * This method involves copying the new data once! + * * @return */ public StringBuffer toStringBuffer() { @@ -200,25 +209,27 @@ public class CharBuffer { sb.append(c, 0, length); return sb; } - + /** * Converts the contents of the buffer into a StringBuffer. * This method involves copying the new data once! + * * @return */ public String toString() { return new String(c, 0, length); } - + /** * Copies the data into a new array of at least capacity size. + * * @param capacity */ public void provideCapacity(final int capacity) { if (c.length >= capacity) { return; } - int newcapacity = ((capacity*3)>>1) + 1; + int newcapacity = ((capacity * 3) >> 1) + 1; char[] newc = new char[newcapacity]; System.arraycopy(c, 0, newc, 0, length); c = newc; diff --git a/src/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/java/org/apache/commons/csv/ExtendedBufferedReader.java index 1b60f155..0323856a 100644 --- a/src/java/org/apache/commons/csv/ExtendedBufferedReader.java +++ b/src/java/org/apache/commons/csv/ExtendedBufferedReader.java @@ -23,214 +23,223 @@ import java.io.Reader; /** * ExtendedBufferedReader * - * A special reader decorater which supports more + * A special reader decorator which supports more * sophisticated access to the underlying reader object. - * + * * In particular the reader supports a look-ahead option, * which allows you to see the next char returned by * next(). - * */ -class ExtendedBufferedReader extends BufferedReader { +class ExtendedBufferedReader extends BufferedReader { - - /** the end of stream symbol */ - public static final int END_OF_STREAM = -1; - /** undefined state for the lookahead char */ - public static final int UNDEFINED = -2; - - /** the lookahead chars */ - private int lookaheadChar = UNDEFINED; - /** the last char returned */ - private int lastChar = UNDEFINED; - /** the line counter */ - private int lineCounter = 0; - private CharBuffer line = new CharBuffer(); - - /** - * Created extended buffered reader using default buffer-size - * - */ - public ExtendedBufferedReader(Reader r) { - super(r); - /* note uh: do not fetch the first char here, - * because this might block the method! + + /** + * the end of stream symbol */ - } - - /** - * Create extended buffered reader using the given buffer-size - */ - public ExtendedBufferedReader(Reader r, int bufSize) { - super(r, bufSize); - /* note uh: do not fetch the first char here, - * because this might block the method! + public static final int END_OF_STREAM = -1; + /** + * undefined state for the lookahead char */ - } - - /** - * Reads the next char from the input stream. - * @return the next char or END_OF_STREAM if end of stream has been reached. - */ - public int read() throws IOException { - // initalize the lookahead - if (lookaheadChar == UNDEFINED) { - lookaheadChar = super.read(); + public static final int UNDEFINED = -2; + + /** + * the lookahead chars + */ + private int lookaheadChar = UNDEFINED; + /** + * the last char returned + */ + private int lastChar = UNDEFINED; + /** + * the line counter + */ + private int lineCounter = 0; + private CharBuffer line = new CharBuffer(); + + /** + * Created extended buffered reader using default buffer-size + */ + public ExtendedBufferedReader(Reader r) { + super(r); + /* note uh: do not fetch the first char here, + * because this might block the method! + */ } - lastChar = lookaheadChar; - if (super.ready()) { - lookaheadChar = super.read(); - } else { - lookaheadChar = UNDEFINED; + + /** + * Create extended buffered reader using the given buffer-size + */ + public ExtendedBufferedReader(Reader r, int bufSize) { + super(r, bufSize); + /* note uh: do not fetch the first char here, + * because this might block the method! + */ } - if (lastChar == '\n') { - lineCounter++; - } - return lastChar; - } - - /** - * Returns the last read character again. - * - * @return the last read char or UNDEFINED - */ - public int readAgain() { - return lastChar; - } - - /** - * Non-blocking reading of len chars into buffer buf starting - * at bufferposition off. - * - * performs an iteratative read on the underlying stream - * as long as the following conditions hold: - * - less than len chars have been read - * - end of stream has not been reached - * - next read is not blocking - * - * @return nof chars actually read or END_OF_STREAM - */ - public int read(char[] buf, int off, int len) throws IOException { - // do not claim if len == 0 - if (len == 0) { - return 0; - } - - // init lookahead, but do not block !! - if (lookaheadChar == UNDEFINED) { - if (ready()) { - lookaheadChar = super.read(); + + /** + * Reads the next char from the input stream. + * + * @return the next char or END_OF_STREAM if end of stream has been reached. + */ + public int read() throws IOException { + // initialize the lookahead + if (lookaheadChar == UNDEFINED) { + lookaheadChar = super.read(); + } + lastChar = lookaheadChar; + if (super.ready()) { + lookaheadChar = super.read(); } else { - return -1; + lookaheadChar = UNDEFINED; + } + if (lastChar == '\n') { + lineCounter++; + } + return lastChar; + } + + /** + * Returns the last read character again. + * + * @return the last read char or UNDEFINED + */ + public int readAgain() { + return lastChar; + } + + /** + * Non-blocking reading of len chars into buffer buf starting + * at bufferposition off. + *

+ * performs an iterative read on the underlying stream + * as long as the following conditions hold: + * - less than len chars have been read + * - end of stream has not been reached + * - next read is not blocking + * + * @return nof chars actually read or END_OF_STREAM + */ + public int read(char[] buf, int off, int len) throws IOException { + // do not claim if len == 0 + if (len == 0) { + return 0; + } + + // init lookahead, but do not block !! + if (lookaheadChar == UNDEFINED) { + if (ready()) { + lookaheadChar = super.read(); + } else { + return -1; + } + } + // 'first read of underlying stream' + if (lookaheadChar == -1) { + return -1; + } + // continue until the lookaheadChar would block + int cOff = off; + while (len > 0 && ready()) { + if (lookaheadChar == -1) { + // eof stream reached, do not continue + return cOff - off; + } else { + buf[cOff++] = (char) lookaheadChar; + if (lookaheadChar == '\n') { + lineCounter++; + } + lastChar = lookaheadChar; + lookaheadChar = super.read(); + len--; + } + } + return cOff - off; + } + + /** + * @return A String containing the contents of the line, not + * including any line-termination characters, or null + * if the end of the stream has been reached + */ + public String readLine() throws IOException { + + if (lookaheadChar == UNDEFINED) { + lookaheadChar = super.read(); + } + + line.clear(); //reuse + + // return null if end of stream has been reached + if (lookaheadChar == END_OF_STREAM) { + return null; + } + // do we have a line termination already + char laChar = (char) lookaheadChar; + if (laChar == '\n' || laChar == '\r') { + lastChar = lookaheadChar; + lookaheadChar = super.read(); + // ignore '\r\n' as well + if ((char) lookaheadChar == '\n') { + lastChar = lookaheadChar; + lookaheadChar = super.read(); + } + lineCounter++; + return line.toString(); + } + + // create the rest-of-line return and update the lookahead + line.append(laChar); + String restOfLine = super.readLine(); // TODO involves copying + lastChar = lookaheadChar; + lookaheadChar = super.read(); + if (restOfLine != null) { + line.append(restOfLine); + } + lineCounter++; + return line.toString(); + } + + /** + * Unsupported + */ + public long skip(long n) throws IllegalArgumentException, IOException { + throw new UnsupportedOperationException("CSV has no reason to implement this"); + } + + /** + * Returns the next char in the stream without consuming it. + * + * Remember the next char read by read(..) will always be + * identical to lookAhead(). + * + * @return the next char (without consuming it) or END_OF_STREAM + */ + public int lookAhead() throws IOException { + if (lookaheadChar == UNDEFINED) { + lookaheadChar = super.read(); + } + return lookaheadChar; + } + + + /** + * Returns the nof line read + * + * @return the current-line-number (or -1) + */ + public int getLineNumber() { + if (lineCounter > -1) { + return lineCounter; + } else { + return -1; } } - // 'first read of underlying stream' - if (lookaheadChar == -1) { - return -1; - } - // continue until the lookaheadChar would block - int cOff = off; - while (len > 0 && ready()) { - if (lookaheadChar == -1) { - // eof stream reached, do not continue - return cOff - off; - } else { - buf[cOff++] = (char) lookaheadChar; - if (lookaheadChar == '\n') { - lineCounter++; - } - lastChar = lookaheadChar; - lookaheadChar = super.read(); - len--; - } - } - return cOff - off; - } - - /** - * @return A String containing the contents of the line, not - * including any line-termination characters, or null - * if the end of the stream has been reached - */ - public String readLine() throws IOException { - - if (lookaheadChar == UNDEFINED) { - lookaheadChar = super.read(); - } - - line.clear(); //reuse - - // return null if end of stream has been reached - if (lookaheadChar == END_OF_STREAM) { - return null; - } - // do we have a line termination already - char laChar = (char) lookaheadChar; - if (laChar == '\n' || laChar == '\r') { - lastChar = lookaheadChar; - lookaheadChar = super.read(); - // ignore '\r\n' as well - if ((char) lookaheadChar == '\n') { - lastChar = lookaheadChar; - lookaheadChar = super.read(); - } - lineCounter++; - return line.toString(); - } - - // create the rest-of-line return and update the lookahead - line.append(laChar); - String restOfLine = super.readLine(); // TODO involves copying - lastChar = lookaheadChar; - lookaheadChar = super.read(); - if (restOfLine != null) { - line.append(restOfLine); - } - lineCounter++; - return line.toString(); - } - - /** - * Unsupported - */ - public long skip(long n) throws IllegalArgumentException, IOException { - throw new UnsupportedOperationException("CSV has no reason to implement this"); - } - - /** - * Returns the next char in the stream without consuming it. - * - * Remember the next char read by read(..) will always be - * identical to lookAhead(). - * - * @return the next char (without consuming it) or END_OF_STREAM - */ - public int lookAhead() throws IOException { - if (lookaheadChar == UNDEFINED) { - lookaheadChar = super.read(); - } - return lookaheadChar; - } - - - /** - * Returns the nof line read - * - * @return the current-line-number (or -1) - */ - public int getLineNumber() { - if (lineCounter > -1) { - return lineCounter; - } else { - return -1; - } - } - /** - * Unsupported - */ - public boolean markSupported() { - throw new UnsupportedOperationException("CSV has no reason to implement this"); - } - + /** + * Unsupported + */ + public boolean markSupported() { + throw new UnsupportedOperationException("CSV has no reason to implement this"); + } + } diff --git a/src/java/org/apache/commons/csv/writer/CSVConfig.java b/src/java/org/apache/commons/csv/writer/CSVConfig.java index ffcc62a1..74c816c7 100644 --- a/src/java/org/apache/commons/csv/writer/CSVConfig.java +++ b/src/java/org/apache/commons/csv/writer/CSVConfig.java @@ -32,69 +32,100 @@ import java.util.List; */ public class CSVConfig { - /** specifies if it is a fixed width csv file **/ + /** + * specifies if it is a fixed width csv file * + */ private boolean fixedWidth; - /** list of fields **/ + /** + * list of fields * + */ private List fields; - /** Do no do any filling **/ - public static final int FILLNONE = 0; - /** Fill content the the left. Mainly usable together with fixedWidth **/ - public static final int FILLLEFT = 1; - /** Fill content to the right. Mainly usable together with fixedWidth **/ - public static final int FILLRIGHT = 2; - - /** The fill pattern */ - private int fill; - /** The fill char. Defaults to a space */ - private char fillChar = ' '; - /** The seperator character. Defaults to , */ - private char delimiter = ','; - /** The row separator. Defaults to \n */ - private String rowDelimiter = "\n"; - /** Should we ignore the delimiter. Defaults to false */ - private boolean ignoreDelimiter = false; - /** the value delimiter. Defaults to " */ - private char valueDelimiter = '"'; - /** Should we ignore the value delimiter. Defaults to true */ - private boolean ignoreValueDelimiter = true; - /** Specifies if we want to use a field header */ - private boolean fieldHeader = false; - /** Specifies if the end of the line needs to be trimmed */ - private boolean endTrimmed = false; /** - * + * Do no do any filling * + */ + public static final int FILLNONE = 0; + /** + * Fill content the the left. Mainly usable together with fixedWidth * + */ + public static final int FILLLEFT = 1; + /** + * Fill content to the right. Mainly usable together with fixedWidth * + */ + public static final int FILLRIGHT = 2; + + /** + * The fill pattern + */ + private int fill; + /** + * The fill char. Defaults to a space + */ + private char fillChar = ' '; + /** + * The seperator character. Defaults to , + */ + private char delimiter = ','; + /** + * The row separator. Defaults to \n + */ + private String rowDelimiter = "\n"; + /** + * Should we ignore the delimiter. Defaults to false + */ + private boolean ignoreDelimiter = false; + /** + * the value delimiter. Defaults to " + */ + private char valueDelimiter = '"'; + /** + * Should we ignore the value delimiter. Defaults to true + */ + private boolean ignoreValueDelimiter = true; + /** + * Specifies if we want to use a field header + */ + private boolean fieldHeader = false; + /** + * Specifies if the end of the line needs to be trimmed + */ + private boolean endTrimmed = false; + + /** + * */ public CSVConfig() { super(); } - + /** * @return if the CSV file is fixedWidth */ public boolean isFixedWidth() { return fixedWidth; } - + /** * Specify if the CSV file is fixed width. * Defaults to false + * * @param fixedWidth the fixedwidth */ public void setFixedWidth(boolean fixedWidth) { this.fixedWidth = fixedWidth; } - + public void addField(CSVField field) { if (fields == null) { fields = new ArrayList(); } fields.add(field); } - + /** * Set the fields that should be used by the writer. * This will overwrite currently added fields completely! + * * @param csvFields the csvfields array. If null it will do nothing */ public void setFields(CSVField[] csvFields) { @@ -103,9 +134,10 @@ public class CSVConfig { } fields = new ArrayList(Arrays.asList(csvFields)); } - + /** * Set the fields that should be used by the writer + * * @param csvField a collection with fields. If null it will do nothing */ public void setFields(Collection csvField) { @@ -125,12 +157,12 @@ public class CSVConfig { } return csvFields; } - + public CSVField getField(String name) { if (fields == null || name == null) { return null; } - for(int i = 0; i < fields.size(); i++) { + for (int i = 0; i < fields.size(); i++) { CSVField field = (CSVField) fields.get(i); if (name.equals(field.getName())) { return field; @@ -149,6 +181,7 @@ public class CSVConfig { /** * Set the fill pattern. Defaults to {@link #FILLNONE} *
Other options are : {@link #FILLLEFT} and {@link #FILLRIGHT} + * * @param fill the fill pattern. */ public void setFill(int fill) { @@ -156,7 +189,6 @@ public class CSVConfig { } /** - * * @return the fillchar. Defaults to a space. */ public char getFillChar() { @@ -165,6 +197,7 @@ public class CSVConfig { /** * Set the fill char + * * @param fillChar the fill char */ public void setFillChar(char fillChar) { @@ -180,6 +213,7 @@ public class CSVConfig { /** * Set the delimiter to use + * * @param delimiter the delimiter character. */ public void setDelimiter(char delimiter) { @@ -195,6 +229,7 @@ public class CSVConfig { /** * Set the rowDelimiter to use + * * @param rowDelimiter the row delimiter character. */ public void setRowDelimiter(String rowDelimiter) { @@ -209,7 +244,8 @@ public class CSVConfig { } /** - * Specify if the writer should ignore the delimiter. + * Specify if the writer should ignore the delimiter. + * * @param ignoreDelimiter defaults to false. */ public void setIgnoreDelimiter(boolean ignoreDelimiter) { @@ -225,6 +261,7 @@ public class CSVConfig { /** * Set the value delimiter to use + * * @param valueDelimiter the value delimiter character. */ public void setValueDelimiter(char valueDelimiter) { @@ -240,7 +277,8 @@ public class CSVConfig { } /** - * Specify if the writer should ignore the value delimiter. + * Specify if the writer should ignore the value delimiter. + * * @param ignoreValueDelimiter defaults to false. */ public void setIgnoreValueDelimiter(boolean ignoreValueDelimiter) { @@ -253,16 +291,19 @@ public class CSVConfig { public boolean isFieldHeader() { return fieldHeader; } + /** * Specify if you want to use a field header. + * * @param fieldHeader true or false. */ public void setFieldHeader(boolean fieldHeader) { this.fieldHeader = fieldHeader; } - + /** * TODO.. + * * @see java.lang.Object#equals(java.lang.Object) */ public boolean equals(Object obj) { @@ -278,8 +319,9 @@ public class CSVConfig { /** * Creates a config based on a stream. It tries to guess
* NOTE : The stream will be closed. - * @param inputStream the inputstream. - * @return the guessed config. + * + * @param inputStream the inputstream. + * @return the guessed config. */ public static CSVConfig guessConfig(InputStream inputStream) { return null; @@ -294,11 +336,12 @@ public class CSVConfig { /** * Specify if the end of the line needs to be trimmed. Defaults to false. + * * @param endTrimmed */ public void setEndTrimmed(boolean endTrimmed) { this.endTrimmed = endTrimmed; } - + } diff --git a/src/java/org/apache/commons/csv/writer/CSVConfigGuesser.java b/src/java/org/apache/commons/csv/writer/CSVConfigGuesser.java index e811f8b7..2b30686b 100644 --- a/src/java/org/apache/commons/csv/writer/CSVConfigGuesser.java +++ b/src/java/org/apache/commons/csv/writer/CSVConfigGuesser.java @@ -30,23 +30,27 @@ import java.io.InputStreamReader; */ public class CSVConfigGuesser { - /** The stream to read */ + /** + * The stream to read + */ private InputStream in; - /** + /** * if the file has a field header (need this info, to be able to guess better) * Defaults to false */ private boolean hasFieldHeader = false; - /** The found config */ - protected CSVConfig config; - /** - * + * The found config + */ + protected CSVConfig config; + + /** + * */ public CSVConfigGuesser() { this.config = new CSVConfig(); } - + /** * @param in the inputstream to guess from */ @@ -54,23 +58,24 @@ public class CSVConfigGuesser { this(); setInputStream(in); } - + public void setInputStream(InputStream in) { this.in = in; } - + /** * Allow override. + * * @return the inputstream that was set. */ protected InputStream getInputStream() { return in; } - + /** - * Guess the config based on the first 10 (or less when less available) + * Guess the config based on the first 10 (or less when less available) * records of a CSV file. - * + * * @return the guessed config. */ public CSVConfig guess() { @@ -80,7 +85,7 @@ public class CSVConfigGuesser { String[] lines = new String[10]; String line = null; int counter = 0; - while ( (line = bIn.readLine()) != null && counter <= 10) { + while ((line = bIn.readLine()) != null && counter <= 10) { lines[counter] = line; counter++; } @@ -91,13 +96,13 @@ public class CSVConfigGuesser { lines = newLines; } analyseLines(lines); - } catch(Exception e) { + } catch (Exception e) { e.printStackTrace(); } finally { if (in != null) { try { in.close(); - } catch(Exception e) { + } catch (Exception e) { // ignore exception. } } @@ -107,15 +112,16 @@ public class CSVConfigGuesser { config = null; return conf; } - + protected void analyseLines(String[] lines) { guessFixedWidth(lines); guessFieldSeperator(lines); } - + /** * Guess if this file is fixedwidth. * Just basing the fact on all lines being of the same length + * * @param lines */ protected void guessFixedWidth(String[] lines) { @@ -132,7 +138,7 @@ public class CSVConfigGuesser { } } } - + protected void guessFieldSeperator(String[] lines) { if (config.isFixedWidth()) { @@ -142,7 +148,7 @@ public class CSVConfigGuesser { for (int i = 0; i < lines.length; i++) { } } - + protected void guessFixedWidthSeperator(String[] lines) { // keep track of the fieldlength int previousMatch = -1; @@ -156,21 +162,21 @@ public class CSVConfigGuesser { if (last != lines[j].charAt(i)) { charMatches = false; break; - } + } } if (charMatches) { if (previousMatch == -1) { previousMatch = 0; } CSVField field = new CSVField(); - field.setName("field"+config.getFields().length+1); - field.setSize((i-previousMatch)); + field.setName("field" + config.getFields().length + 1); + field.setSize((i - previousMatch)); config.addField(field); } } } + /** - * * @return if the field uses a field header. Defaults to false. */ public boolean hasFieldHeader() { @@ -179,11 +185,12 @@ public class CSVConfigGuesser { /** * Specify if the CSV file has a field header + * * @param hasFieldHeader true or false */ public void setHasFieldHeader(boolean hasFieldHeader) { this.hasFieldHeader = hasFieldHeader; } - - + + } diff --git a/src/java/org/apache/commons/csv/writer/CSVField.java b/src/java/org/apache/commons/csv/writer/CSVField.java index 3b67a42d..45936675 100644 --- a/src/java/org/apache/commons/csv/writer/CSVField.java +++ b/src/java/org/apache/commons/csv/writer/CSVField.java @@ -20,7 +20,6 @@ package org.apache.commons.csv.writer; /** - * * @author Martin van den Bemt * @version $Id: $ */ @@ -32,7 +31,7 @@ public class CSVField { private boolean overrideFill; /** - * + * */ public CSVField() { } @@ -59,9 +58,10 @@ public class CSVField { public String getName() { return name; } - + /** * Set the name of the field + * * @param name the name */ public void setName(String name) { @@ -69,7 +69,6 @@ public class CSVField { } /** - * * @return the size of the field */ public int getSize() { @@ -79,6 +78,7 @@ public class CSVField { /** * Set the size of the field. * The size will be ignored when fixedwidth is set to false in the CSVConfig + * * @param size the size of the field. */ public void setSize(int size) { @@ -94,16 +94,17 @@ public class CSVField { /** * Sets overrideFill to true. + * * @param fill the file pattern */ public void setFill(int fill) { overrideFill = true; this.fill = fill; } - + /** * Does this field override fill ? - * + * * @return */ public boolean overrideFill() { diff --git a/src/java/org/apache/commons/csv/writer/CSVWriter.java b/src/java/org/apache/commons/csv/writer/CSVWriter.java index 5ab7dfc6..ce79ca16 100644 --- a/src/java/org/apache/commons/csv/writer/CSVWriter.java +++ b/src/java/org/apache/commons/csv/writer/CSVWriter.java @@ -31,16 +31,21 @@ import java.util.Map; */ public class CSVWriter { - /** The CSV config **/ - private CSVConfig config; - /** The writer **/ - private Writer writer; /** - * + * The CSV config * + */ + private CSVConfig config; + /** + * The writer * + */ + private Writer writer; + + /** + * */ public CSVWriter() { } - + public CSVWriter(CSVConfig config) { setConfig(config); } @@ -56,12 +61,12 @@ public class CSVWriter { value = writeValue(fields[i], value); sb.append(value); } - if (!config.isDelimiterIgnored() && fields.length != (i+1)) { + if (!config.isDelimiterIgnored() && fields.length != (i + 1)) { sb.append(config.getDelimiter()); } } if (config.isEndTrimmed()) { - for (int i = sb.length()-1; i >= 0; i--) { + for (int i = sb.length() - 1; i >= 0; i--) { System.out.println("i : " + i); if (Character.isWhitespace(sb.charAt(i))) { sb.deleteCharAt(i); @@ -73,11 +78,11 @@ public class CSVWriter { sb.append(config.getRowDelimiter()); String line = sb.toString(); writer.write(line); - } catch(Exception e) { + } catch (Exception e) { e.printStackTrace(); } } - + protected String writeValue(CSVField field, String value) throws Exception { if (config.isFixedWidth()) { if (value.length() < field.getSize()) { @@ -106,11 +111,11 @@ public class CSVWriter { } if (!config.isValueDelimiterIgnored()) { // add the value delimiter.. - value = config.getValueDelimiter()+value+config.getValueDelimiter(); + value = config.getValueDelimiter() + value + config.getValueDelimiter(); } return value; } - + /** * @return the CVSConfig or null if not present */ @@ -120,14 +125,16 @@ public class CSVWriter { /** * Set the CSVConfig + * * @param config the CVSConfig */ public void setConfig(CSVConfig config) { this.config = config; } - + /** * Set the writer to write the CSV file to. + * * @param writer the writer. */ public void setWriter(Writer writer) { diff --git a/src/test/org/apache/commons/csv/CSVParserTest.java b/src/test/org/apache/commons/csv/CSVParserTest.java index 8212c2b3..ecee9c7e 100644 --- a/src/test/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/org/apache/commons/csv/CSVParserTest.java @@ -27,569 +27,573 @@ import junit.framework.TestCase; * CSVParserTest * * The test are organized in three different sections: - * The 'setter/getter' section, the lexer section and finally the parser - * section. In case a test fails, you should follow a top-down approach for + * The 'setter/getter' section, the lexer section and finally the parser + * section. In case a test fails, you should follow a top-down approach for * fixing a potential bug (its likely that the parser itself fails if the lexer * has problems...). */ public class CSVParserTest extends TestCase { - - /** - * TestCSVParser. - */ - class TestCSVParser extends CSVParser { + /** - * Test parser to investigate the type of the internal Token. - * @param in a Reader + * TestCSVParser. */ - TestCSVParser(Reader in) { - super(in); + class TestCSVParser extends CSVParser { + /** + * Test parser to investigate the type of the internal Token. + * + * @param in a Reader + */ + TestCSVParser(Reader in) { + super(in); + } + + TestCSVParser(Reader in, CSVStrategy strategy) { + super(in, strategy); + } + + /** + * Calls super.nextToken() and prints out a String representation of token + * type and content. + * + * @return String representation of token type and content + * @throws IOException like {@link CSVParser#nextToken()} + */ + public String testNextToken() throws IOException { + Token t = super.nextToken(); + return Integer.toString(t.type) + ";" + t.content + ";"; + } } - TestCSVParser(Reader in, CSVStrategy strategy) { - super(in, strategy); + // ====================================================== + // lexer tests + // ====================================================== + + // Single line (without comment) + public void testNextToken1() throws IOException { + String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,"; + TestCSVParser parser = new TestCSVParser(new StringReader(code)); + assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";lmnop;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";qrst;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";uv;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";wxy;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";z;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken()); + assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken()); } - /** - * Calls super.nextToken() and prints out a String representation of token - * type and content. - * @return String representation of token type and content - * @throws IOException like {@link CSVParser#nextToken()} - */ - public String testNextToken() throws IOException { - Token t = super.nextToken(); - return Integer.toString(t.type) + ";" + t.content + ";"; + + // multiline including comments (and empty lines) + public void testNextToken2() throws IOException { + /* file: 1,2,3, + * a,b x,c + * + * # this is a comment + * d,e, + * + */ + String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n"; + CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone(); + // strategy.setIgnoreEmptyLines(false); + strategy.setCommentStart('#'); + + TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy); + + + assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken()); + assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";b x;", parser.testNextToken()); + assertEquals(CSVParser.TT_EORECORD + ";c;", parser.testNextToken()); + assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";d;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";e;", parser.testNextToken()); + assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken()); + assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken()); + assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken()); + } - } - - // ====================================================== - // lexer tests - // ====================================================== - - // Single line (without comment) - public void testNextToken1() throws IOException { - String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,"; - TestCSVParser parser = new TestCSVParser(new StringReader(code)); - assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";lmnop;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";qrst;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";uv;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";wxy;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";z;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken()); - assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken()); - } - - // multiline including comments (and empty lines) - public void testNextToken2() throws IOException { - /* file: 1,2,3, - * a,b x,c - * - * # this is a comment - * d,e, - * - */ - String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n"; - CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone(); - // strategy.setIgnoreEmptyLines(false); - strategy.setCommentStart('#'); - TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy); + // simple token with escaping + public void testNextToken3() throws IOException { + /* file: a,\,,b + * \,, + */ + String code = "a,\\,,b\n\\,,"; + CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone(); + strategy.setCommentStart('#'); + TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy); + assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); + // an unquoted single backslash is not an escape char + assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken()); + assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken()); + // an unquoted single backslash is not an escape char + assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken()); + assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken()); + } - assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken()); - assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";b x;", parser.testNextToken()); - assertEquals(CSVParser.TT_EORECORD + ";c;", parser.testNextToken()); - assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";d;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";e;", parser.testNextToken()); - assertEquals(CSVParser.TT_EORECORD + ";;", parser.testNextToken()); - assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken()); - assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken()); - - } - - // simple token with escaping - public void testNextToken3() throws IOException { - /* file: a,\,,b - * \,, - */ - String code = "a,\\,,b\n\\,,"; - CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone(); - strategy.setCommentStart('#'); - TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy); - - assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); - // an unquoted single backslash is not an escape char - assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken()); - assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken()); - // an unquoted single backslash is not an escape char - assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken()); - assertEquals(CSVParser.TT_EOF + ";;", parser.testNextToken()); - } - - // encapsulator tokenizer (sinle line) - public void testNextToken4() throws IOException { - /* file: a,"foo",b - * a, " foo",b - * a,"foo " ,b // whitespace after closing encapsulator - * a, " foo " ,b - */ - String code = - "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b"; - TestCSVParser parser = new TestCSVParser(new StringReader(code)); - assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken()); - assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + "; foo;", parser.testNextToken()); - assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";foo ;", parser.testNextToken()); - assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + "; foo ;", parser.testNextToken()); + // encapsulator tokenizer (sinle line) + public void testNextToken4() throws IOException { + /* file: a,"foo",b + * a, " foo",b + * a,"foo " ,b // whitespace after closing encapsulator + * a, " foo " ,b + */ + String code = + "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b"; + TestCSVParser parser = new TestCSVParser(new StringReader(code)); + assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken()); + assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + "; foo;", parser.testNextToken()); + assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";foo ;", parser.testNextToken()); + assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + "; foo ;", parser.testNextToken()); // assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken()); - assertEquals(CSVParser.TT_EOF + ";b;", parser.testNextToken()); - } - - // encapsulator tokenizer (multi line, delimiter in string) - public void testNextToken5() throws IOException { - String code = - "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\""; - TestCSVParser parser = new TestCSVParser(new StringReader(code)); - assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken()); - assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken()); - assertEquals(CSVParser.TT_EORECORD + ";foo\n baar ,,,;", - parser.testNextToken()); - assertEquals(CSVParser.TT_EOF + ";\n\t \n;", parser.testNextToken()); - - } - - // change delimiters, comment, encapsulater - public void testNextToken6() throws IOException { - /* file: a;'b and \' more - * ' - * !comment;;;; - * ;; - */ - String code = "a;'b and '' more\n'\n!comment;;;;\n;;"; - TestCSVParser parser = new TestCSVParser(new StringReader(code), new CSVStrategy(';', '\'', '!')); - assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); - assertEquals( - CSVParser.TT_EORECORD + ";b and ' more\n;", - parser.testNextToken()); - } - - - // ====================================================== - // parser tests - // ====================================================== - - String code = - "a,b,c,d\n" - + " a , b , 1 2 \n" - + "\"foo baar\", b,\n" - // + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n"; - + " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping - String[][] res = { - {"a", "b", "c", "d"}, - {"a", "b", "1 2"}, - {"foo baar", "b", ""}, - {"foo\n,,\n\",,\n\"", "d", "e"} - }; - public void testGetLine() throws IOException { - CSVParser parser = new CSVParser(new StringReader(code)); - String[] tmp = null; - for (int i = 0; i < res.length; i++) { - tmp = parser.getLine(); - assertTrue(Arrays.equals(res[i], tmp)); + assertEquals(CSVParser.TT_EOF + ";b;", parser.testNextToken()); } - tmp = parser.getLine(); - assertTrue(tmp == null); - } - - public void testNextValue() throws IOException { - CSVParser parser = new CSVParser(new StringReader(code)); - String tmp = null; - for (int i = 0; i < res.length; i++) { - for (int j = 0; j < res[i].length; j++) { + + // encapsulator tokenizer (multi line, delimiter in string) + public void testNextToken5() throws IOException { + String code = + "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\""; + TestCSVParser parser = new TestCSVParser(new StringReader(code)); + assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken()); + assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken()); + assertEquals(CSVParser.TT_EORECORD + ";foo\n baar ,,,;", + parser.testNextToken()); + assertEquals(CSVParser.TT_EOF + ";\n\t \n;", parser.testNextToken()); + + } + + // change delimiters, comment, encapsulater + public void testNextToken6() throws IOException { + /* file: a;'b and \' more + * ' + * !comment;;;; + * ;; + */ + String code = "a;'b and '' more\n'\n!comment;;;;\n;;"; + TestCSVParser parser = new TestCSVParser(new StringReader(code), new CSVStrategy(';', '\'', '!')); + assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); + assertEquals( + CSVParser.TT_EORECORD + ";b and ' more\n;", + parser.testNextToken()); + } + + + // ====================================================== + // parser tests + // ====================================================== + + String code = + "a,b,c,d\n" + + " a , b , 1 2 \n" + + "\"foo baar\", b,\n" + // + " \"foo\n,,\n\"\",,\n\\\"\",d,e\n"; + + " \"foo\n,,\n\"\",,\n\"\"\",d,e\n"; // changed to use standard CSV escaping + String[][] res = { + {"a", "b", "c", "d"}, + {"a", "b", "1 2"}, + {"foo baar", "b", ""}, + {"foo\n,,\n\",,\n\"", "d", "e"} + }; + + public void testGetLine() throws IOException { + CSVParser parser = new CSVParser(new StringReader(code)); + String[] tmp = null; + for (int i = 0; i < res.length; i++) { + tmp = parser.getLine(); + assertTrue(Arrays.equals(res[i], tmp)); + } + tmp = parser.getLine(); + assertTrue(tmp == null); + } + + public void testNextValue() throws IOException { + CSVParser parser = new CSVParser(new StringReader(code)); + String tmp = null; + for (int i = 0; i < res.length; i++) { + for (int j = 0; j < res[i].length; j++) { + tmp = parser.nextValue(); + assertEquals(res[i][j], tmp); + } + } tmp = parser.nextValue(); - assertEquals(res[i][j], tmp); - } + assertTrue(tmp == null); } - tmp = parser.nextValue(); - assertTrue(tmp == null); - } - - public void testGetAllValues() throws IOException { - CSVParser parser = new CSVParser(new StringReader(code)); - String[][] tmp = parser.getAllValues(); - assertEquals(res.length, tmp.length); - assertTrue(tmp.length > 0); - for (int i = 0; i < res.length; i++) { - assertTrue(Arrays.equals(res[i], tmp[i])); + + public void testGetAllValues() throws IOException { + CSVParser parser = new CSVParser(new StringReader(code)); + String[][] tmp = parser.getAllValues(); + assertEquals(res.length, tmp.length); + assertTrue(tmp.length > 0); + for (int i = 0; i < res.length; i++) { + assertTrue(Arrays.equals(res[i], tmp[i])); + } } - } - - public void testExcelStrategy1() throws IOException { - String code = - "value1,value2,value3,value4\r\na,b,c,d\r\n x,,," - + "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n"; - String[][] res = { - {"value1", "value2", "value3", "value4"}, - {"a", "b", "c", "d"}, - {" x", "", "", ""}, - {""}, - {"\"hello\"", " \"world\"", "abc\ndef", ""} - }; - CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY); - String[][] tmp = parser.getAllValues(); - assertEquals(res.length, tmp.length); - assertTrue(tmp.length > 0); - for (int i = 0; i < res.length; i++) { - assertTrue(Arrays.equals(res[i], tmp[i])); - } - } - - public void testExcelStrategy2() throws Exception { - String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n"; - String[][] res = { - {"foo", "baar"}, - {""}, - {"hello", ""}, - {""}, - {"world", ""} - }; - CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY); - String[][] tmp = parser.getAllValues(); - assertEquals(res.length, tmp.length); - assertTrue(tmp.length > 0); - for (int i = 0; i < res.length; i++) { - assertTrue(Arrays.equals(res[i], tmp[i])); - } - } - - public void testEndOfFileBehaviourExcel() throws Exception { - String[] codes = { - "hello,\r\n\r\nworld,\r\n", - "hello,\r\n\r\nworld,", - "hello,\r\n\r\nworld,\"\"\r\n", - "hello,\r\n\r\nworld,\"\"", - "hello,\r\n\r\nworld,\n", - "hello,\r\n\r\nworld,", - "hello,\r\n\r\nworld,\"\"\n", - "hello,\r\n\r\nworld,\"\"" + + public void testExcelStrategy1() throws IOException { + String code = + "value1,value2,value3,value4\r\na,b,c,d\r\n x,,," + + "\r\n\r\n\"\"\"hello\"\"\",\" \"\"world\"\"\",\"abc\ndef\",\r\n"; + String[][] res = { + {"value1", "value2", "value3", "value4"}, + {"a", "b", "c", "d"}, + {" x", "", "", ""}, + {""}, + {"\"hello\"", " \"world\"", "abc\ndef", ""} }; - String[][] res = { - {"hello", ""}, - {""}, // ExcelStrategy does not ignore empty lines - {"world", ""} - }; - String code; - for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { - code = codes[codeIndex]; - CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY); - String[][] tmp = parser.getAllValues(); - assertEquals(res.length, tmp.length); - assertTrue(tmp.length > 0); - for (int i = 0; i < res.length; i++) { - assertTrue(Arrays.equals(res[i], tmp[i])); - } + CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY); + String[][] tmp = parser.getAllValues(); + assertEquals(res.length, tmp.length); + assertTrue(tmp.length > 0); + for (int i = 0; i < res.length; i++) { + assertTrue(Arrays.equals(res[i], tmp[i])); + } } - } - - public void testEndOfFileBehaviorCSV() throws Exception { - String[] codes = { - "hello,\r\n\r\nworld,\r\n", - "hello,\r\n\r\nworld,", - "hello,\r\n\r\nworld,\"\"\r\n", - "hello,\r\n\r\nworld,\"\"", - "hello,\r\n\r\nworld,\n", - "hello,\r\n\r\nworld,", - "hello,\r\n\r\nworld,\"\"\n", - "hello,\r\n\r\nworld,\"\"" + + public void testExcelStrategy2() throws Exception { + String code = "foo,baar\r\n\r\nhello,\r\n\r\nworld,\r\n"; + String[][] res = { + {"foo", "baar"}, + {""}, + {"hello", ""}, + {""}, + {"world", ""} }; - String[][] res = { - {"hello", ""}, // CSV Strategy ignores empty lines - {"world", ""} - }; - String code; - for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { - code = codes[codeIndex]; - CSVParser parser = new CSVParser(new StringReader(code)); - String[][] tmp = parser.getAllValues(); - assertEquals(res.length, tmp.length); - assertTrue(tmp.length > 0); - for (int i = 0; i < res.length; i++) { - assertTrue(Arrays.equals(res[i], tmp[i])); - } + CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY); + String[][] tmp = parser.getAllValues(); + assertEquals(res.length, tmp.length); + assertTrue(tmp.length > 0); + for (int i = 0; i < res.length; i++) { + assertTrue(Arrays.equals(res[i], tmp[i])); + } } - } - - public void testEmptyLineBehaviourExcel() throws Exception { - String[] codes = { - "hello,\r\n\r\n\r\n", - "hello,\n\n\n", - "hello,\"\"\r\n\r\n\r\n", - "hello,\"\"\n\n\n" + + public void testEndOfFileBehaviourExcel() throws Exception { + String[] codes = { + "hello,\r\n\r\nworld,\r\n", + "hello,\r\n\r\nworld,", + "hello,\r\n\r\nworld,\"\"\r\n", + "hello,\r\n\r\nworld,\"\"", + "hello,\r\n\r\nworld,\n", + "hello,\r\n\r\nworld,", + "hello,\r\n\r\nworld,\"\"\n", + "hello,\r\n\r\nworld,\"\"" }; - String[][] res = { - {"hello", ""}, - {""}, // ExcelStrategy does not ignore empty lines - {""} - }; - String code; - for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { - code = codes[codeIndex]; - CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY); - String[][] tmp = parser.getAllValues(); - assertEquals(res.length, tmp.length); - assertTrue(tmp.length > 0); - for (int i = 0; i < res.length; i++) { - assertTrue(Arrays.equals(res[i], tmp[i])); - } - } - } - - public void testEmptyLineBehaviourCSV() throws Exception { - String[] codes = { - "hello,\r\n\r\n\r\n", - "hello,\n\n\n", - "hello,\"\"\r\n\r\n\r\n", - "hello,\"\"\n\n\n" + String[][] res = { + {"hello", ""}, + {""}, // ExcelStrategy does not ignore empty lines + {"world", ""} }; - String[][] res = { - {"hello", ""} // CSV Strategy ignores empty lines - }; - String code; - for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { - code = codes[codeIndex]; - CSVParser parser = new CSVParser(new StringReader(code)); - String[][] tmp = parser.getAllValues(); - assertEquals(res.length, tmp.length); - assertTrue(tmp.length > 0); - for (int i = 0; i < res.length; i++) { - assertTrue(Arrays.equals(res[i], tmp[i])); - } - } - } - - public void OLDtestBackslashEscaping() throws IOException { - String code = - "one,two,three\n" - + "on\\\"e,two\n" - + "on\"e,two\n" - + "one,\"tw\\\"o\"\n" - + "one,\"t\\,wo\"\n" - + "one,two,\"th,ree\"\n" - + "\"a\\\\\"\n" - + "a\\,b\n" - + "\"a\\\\,b\""; - String[][] res = { - { "one", "two", "three" }, - { "on\\\"e", "two" }, - { "on\"e", "two" }, - { "one", "tw\"o" }, - { "one", "t\\,wo" }, // backslash in quotes only escapes a delimiter (",") - { "one", "two", "th,ree" }, - { "a\\\\" }, // backslash in quotes only escapes a delimiter (",") - { "a\\", "b" }, // a backslash must be returnd - { "a\\\\,b" } // backslash in quotes only escapes a delimiter (",") - }; - CSVParser parser = new CSVParser(new StringReader(code)); - String[][] tmp = parser.getAllValues(); - assertEquals(res.length, tmp.length); - assertTrue(tmp.length > 0); - for (int i = 0; i < res.length; i++) { - assertTrue(Arrays.equals(res[i], tmp[i])); - } - } - - public void testBackslashEscaping() throws IOException { - - // To avoid confusion over the need for escaping chars in java code, - // We will test with a forward slash as the escape char, and a single - // quote as the encapsulator. - - String code = - "one,two,three\n" // 0 - + "'',''\n" // 1) empty encapsulators - + "/',/'\n" // 2) single encapsulators - + "'/'','/''\n" // 3) single encapsulators encapsulated via escape - + "'''',''''\n" // 4) single encapsulators encapsulated via doubling - + "/,,/,\n" // 5) separator escaped - + "//,//\n" // 6) escape escaped - + "'//','//'\n" // 7) escape escaped in encapsulation - + " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces - + "9, /\n \n" // escaped newline - + ""; - String[][] res = { - { "one", "two", "three" }, // 0 - { "", "" }, // 1 - { "'", "'" }, // 2 - { "'", "'" }, // 3 - { "'", "'" }, // 4 - { ",", "," }, // 5 - { "/", "/" }, // 6 - { "/", "/" }, // 7 - { " 8 ", " \"quoted \"\" \" / string\" " }, - { "9", " \n " }, - }; - - - CSVStrategy strategy = new CSVStrategy(',','\'',CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true); - - CSVParser parser = new CSVParser(new StringReader(code), strategy); - String[][] tmp = parser.getAllValues(); - assertTrue(tmp.length > 0); - for (int i = 0; i < res.length; i++) { - assertTrue(Arrays.equals(res[i], tmp[i])); - } - } - - public void testBackslashEscaping2() throws IOException { - - // To avoid confusion over the need for escaping chars in java code, - // We will test with a forward slash as the escape char, and a single - // quote as the encapsulator. - - String code = "" - + " , , \n" // 1) - + " \t , , \n" // 2) - + " // , /, , /,\n" // 3) - + ""; - String[][] res = { - { " ", " ", " " }, // 1 - { " \t ", " ", " " }, // 2 - { " / ", " , ", " ," }, //3 - }; - - - CSVStrategy strategy = new CSVStrategy(',',CSVStrategy.ENCAPSULATOR_DISABLED,CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true); - - CSVParser parser = new CSVParser(new StringReader(code), strategy); - String[][] tmp = parser.getAllValues(); - assertTrue(tmp.length > 0); - - if (!CSVPrinterTest.equals(res, tmp)) { - assertTrue(false); + String code; + for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { + code = codes[codeIndex]; + CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY); + String[][] tmp = parser.getAllValues(); + assertEquals(res.length, tmp.length); + assertTrue(tmp.length > 0); + for (int i = 0; i < res.length; i++) { + assertTrue(Arrays.equals(res[i], tmp[i])); + } + } } - } - - - public void testDefaultStrategy() throws IOException { - - String code = "" - + "a,b\n" // 1) - + "\"\n\",\" \"\n" // 2) - + "\"\",#\n" // 2) - ; - String[][] res = { - { "a", "b" }, - { "\n", " " }, - { "", "#" }, - }; - - CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY; - assertEquals(CSVStrategy.COMMENTS_DISABLED, strategy.getCommentStart()); - - CSVParser parser = new CSVParser(new StringReader(code), strategy); - String[][] tmp = parser.getAllValues(); - assertTrue(tmp.length > 0); - - if (!CSVPrinterTest.equals(res, tmp)) { - assertTrue(false); + public void testEndOfFileBehaviorCSV() throws Exception { + String[] codes = { + "hello,\r\n\r\nworld,\r\n", + "hello,\r\n\r\nworld,", + "hello,\r\n\r\nworld,\"\"\r\n", + "hello,\r\n\r\nworld,\"\"", + "hello,\r\n\r\nworld,\n", + "hello,\r\n\r\nworld,", + "hello,\r\n\r\nworld,\"\"\n", + "hello,\r\n\r\nworld,\"\"" + }; + String[][] res = { + {"hello", ""}, // CSV Strategy ignores empty lines + {"world", ""} + }; + String code; + for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { + code = codes[codeIndex]; + CSVParser parser = new CSVParser(new StringReader(code)); + String[][] tmp = parser.getAllValues(); + assertEquals(res.length, tmp.length); + assertTrue(tmp.length > 0); + for (int i = 0; i < res.length; i++) { + assertTrue(Arrays.equals(res[i], tmp[i])); + } + } } - String[][] res_comments = { - { "a", "b" }, - { "\n", " " }, - { ""}, - }; - - strategy = new CSVStrategy(',','"','#'); - parser = new CSVParser(new StringReader(code), strategy); - tmp = parser.getAllValues(); - - if (!CSVPrinterTest.equals(res_comments, tmp)) { - assertTrue(false); + public void testEmptyLineBehaviourExcel() throws Exception { + String[] codes = { + "hello,\r\n\r\n\r\n", + "hello,\n\n\n", + "hello,\"\"\r\n\r\n\r\n", + "hello,\"\"\n\n\n" + }; + String[][] res = { + {"hello", ""}, + {""}, // ExcelStrategy does not ignore empty lines + {""} + }; + String code; + for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { + code = codes[codeIndex]; + CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY); + String[][] tmp = parser.getAllValues(); + assertEquals(res.length, tmp.length); + assertTrue(tmp.length > 0); + for (int i = 0; i < res.length; i++) { + assertTrue(Arrays.equals(res[i], tmp[i])); + } + } + } + + public void testEmptyLineBehaviourCSV() throws Exception { + String[] codes = { + "hello,\r\n\r\n\r\n", + "hello,\n\n\n", + "hello,\"\"\r\n\r\n\r\n", + "hello,\"\"\n\n\n" + }; + String[][] res = { + {"hello", ""} // CSV Strategy ignores empty lines + }; + String code; + for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { + code = codes[codeIndex]; + CSVParser parser = new CSVParser(new StringReader(code)); + String[][] tmp = parser.getAllValues(); + assertEquals(res.length, tmp.length); + assertTrue(tmp.length > 0); + for (int i = 0; i < res.length; i++) { + assertTrue(Arrays.equals(res[i], tmp[i])); + } + } + } + + public void OLDtestBackslashEscaping() throws IOException { + String code = + "one,two,three\n" + + "on\\\"e,two\n" + + "on\"e,two\n" + + "one,\"tw\\\"o\"\n" + + "one,\"t\\,wo\"\n" + + "one,two,\"th,ree\"\n" + + "\"a\\\\\"\n" + + "a\\,b\n" + + "\"a\\\\,b\""; + String[][] res = { + {"one", "two", "three"}, + {"on\\\"e", "two"}, + {"on\"e", "two"}, + {"one", "tw\"o"}, + {"one", "t\\,wo"}, // backslash in quotes only escapes a delimiter (",") + {"one", "two", "th,ree"}, + {"a\\\\"}, // backslash in quotes only escapes a delimiter (",") + {"a\\", "b"}, // a backslash must be returnd + {"a\\\\,b"} // backslash in quotes only escapes a delimiter (",") + }; + CSVParser parser = new CSVParser(new StringReader(code)); + String[][] tmp = parser.getAllValues(); + assertEquals(res.length, tmp.length); + assertTrue(tmp.length > 0); + for (int i = 0; i < res.length; i++) { + assertTrue(Arrays.equals(res[i], tmp[i])); + } + } + + public void testBackslashEscaping() throws IOException { + + // To avoid confusion over the need for escaping chars in java code, + // We will test with a forward slash as the escape char, and a single + // quote as the encapsulator. + + String code = + "one,two,three\n" // 0 + + "'',''\n" // 1) empty encapsulators + + "/',/'\n" // 2) single encapsulators + + "'/'','/''\n" // 3) single encapsulators encapsulated via escape + + "'''',''''\n" // 4) single encapsulators encapsulated via doubling + + "/,,/,\n" // 5) separator escaped + + "//,//\n" // 6) escape escaped + + "'//','//'\n" // 7) escape escaped in encapsulation + + " 8 , \"quoted \"\" /\" // string\" \n" // don't eat spaces + + "9, /\n \n" // escaped newline + + ""; + String[][] res = { + {"one", "two", "three"}, // 0 + {"", ""}, // 1 + {"'", "'"}, // 2 + {"'", "'"}, // 3 + {"'", "'"}, // 4 + {",", ","}, // 5 + {"/", "/"}, // 6 + {"/", "/"}, // 7 + {" 8 ", " \"quoted \"\" \" / string\" "}, + {"9", " \n "}, + }; + + + CSVStrategy strategy = new CSVStrategy(',', '\'', CSVStrategy.COMMENTS_DISABLED, '/', false, false, true, true); + + CSVParser parser = new CSVParser(new StringReader(code), strategy); + String[][] tmp = parser.getAllValues(); + assertTrue(tmp.length > 0); + for (int i = 0; i < res.length; i++) { + assertTrue(Arrays.equals(res[i], tmp[i])); + } + } + + public void testBackslashEscaping2() throws IOException { + + // To avoid confusion over the need for escaping chars in java code, + // We will test with a forward slash as the escape char, and a single + // quote as the encapsulator. + + String code = "" + + " , , \n" // 1) + + " \t , , \n" // 2) + + " // , /, , /,\n" // 3) + + ""; + String[][] res = { + {" ", " ", " "}, // 1 + {" \t ", " ", " "}, // 2 + {" / ", " , ", " ,"}, //3 + }; + + + CSVStrategy strategy = new CSVStrategy(',', CSVStrategy.ENCAPSULATOR_DISABLED, CSVStrategy.COMMENTS_DISABLED, '/', false, false, true, true); + + CSVParser parser = new CSVParser(new StringReader(code), strategy); + String[][] tmp = parser.getAllValues(); + assertTrue(tmp.length > 0); + + if (!CSVPrinterTest.equals(res, tmp)) { + assertTrue(false); + } + + } + + + public void testDefaultStrategy() throws IOException { + + String code = "" + + "a,b\n" // 1) + + "\"\n\",\" \"\n" // 2) + + "\"\",#\n" // 2) + ; + String[][] res = { + {"a", "b"}, + {"\n", " "}, + {"", "#"}, + }; + + CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY; + assertEquals(CSVStrategy.COMMENTS_DISABLED, strategy.getCommentStart()); + + CSVParser parser = new CSVParser(new StringReader(code), strategy); + String[][] tmp = parser.getAllValues(); + assertTrue(tmp.length > 0); + + if (!CSVPrinterTest.equals(res, tmp)) { + assertTrue(false); + } + + String[][] res_comments = { + {"a", "b"}, + {"\n", " "}, + {""}, + }; + + strategy = new CSVStrategy(',', '"', '#'); + parser = new CSVParser(new StringReader(code), strategy); + tmp = parser.getAllValues(); + + if (!CSVPrinterTest.equals(res_comments, tmp)) { + assertTrue(false); + } } - } public void testUnicodeEscape() throws IOException { - String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063"; - CSVParser parser = new CSVParser(new StringReader(code)); - parser.getStrategy().setUnicodeEscapeInterpretation(true); - String[] data = parser.getLine(); - assertEquals(2, data.length); - assertEquals("abc", data[0]); - assertEquals("public", data[1]); + String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063"; + CSVParser parser = new CSVParser(new StringReader(code)); + parser.getStrategy().setUnicodeEscapeInterpretation(true); + String[] data = parser.getLine(); + assertEquals(2, data.length); + assertEquals("abc", data[0]); + assertEquals("public", data[1]); } - + public void testCarriageReturnLineFeedEndings() throws IOException { - String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; - CSVParser parser = new CSVParser(new StringReader(code)); - String[][] data = parser.getAllValues(); - assertEquals(4, data.length); + String code = "foo\r\nbaar,\r\nhello,world\r\n,kanu"; + CSVParser parser = new CSVParser(new StringReader(code)); + String[][] data = parser.getAllValues(); + assertEquals(4, data.length); } public void testCarriageReturnEndings() throws IOException { - String code = "foo\rbaar,\rhello,world\r,kanu"; - CSVParser parser = new CSVParser(new StringReader(code)); - String[][] data = parser.getAllValues(); - assertEquals(4, data.length); + String code = "foo\rbaar,\rhello,world\r,kanu"; + CSVParser parser = new CSVParser(new StringReader(code)); + String[][] data = parser.getAllValues(); + assertEquals(4, data.length); } public void testLineFeedEndings() throws IOException { - String code = "foo\nbaar,\nhello,world\n,kanu"; - CSVParser parser = new CSVParser(new StringReader(code)); - String[][] data = parser.getAllValues(); - assertEquals(4, data.length); + String code = "foo\nbaar,\nhello,world\n,kanu"; + CSVParser parser = new CSVParser(new StringReader(code)); + String[][] data = parser.getAllValues(); + assertEquals(4, data.length); } public void testIgnoreEmptyLines() throws IOException { - String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n"; - //String code = "world\r\n\n"; - //String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n"; - CSVParser parser = new CSVParser(new StringReader(code)); - String[][] data = parser.getAllValues(); - assertEquals(3, data.length); + String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n"; + //String code = "world\r\n\n"; + //String code = "foo;baar\r\n\r\nhello;\r\n\r\nworld;\r\n"; + CSVParser parser = new CSVParser(new StringReader(code)); + String[][] data = parser.getAllValues(); + assertEquals(3, data.length); } - + public void testLineTokenConsistency() throws IOException { - String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n"; - CSVParser parser = new CSVParser(new StringReader(code)); - String[][] data = parser.getAllValues(); - parser = new CSVParser(new StringReader(code)); - CSVParser parser1 = new CSVParser(new StringReader(code)); - for (int i = 0; i < data.length; i++) { - assertTrue(Arrays.equals(parser1.getLine(), data[i])); - for (int j = 0; j < data[i].length; j++) { - assertEquals(parser.nextValue(), data[i][j]); + String code = "\nfoo,baar\n\r\n,\n\n,world\r\n\n"; + CSVParser parser = new CSVParser(new StringReader(code)); + String[][] data = parser.getAllValues(); + parser = new CSVParser(new StringReader(code)); + CSVParser parser1 = new CSVParser(new StringReader(code)); + for (int i = 0; i < data.length; i++) { + assertTrue(Arrays.equals(parser1.getLine(), data[i])); + for (int j = 0; j < data[i].length; j++) { + assertEquals(parser.nextValue(), data[i][j]); + } } - } } // From SANDBOX-153 - public void testDelimiterIsWhitespace() throws IOException { - String code = "one\ttwo\t\tfour \t five\t six"; - TestCSVParser parser = new TestCSVParser(new StringReader(code), CSVStrategy.TDF_STRATEGY); - assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";four;", parser.testNextToken()); - assertEquals(CSVParser.TT_TOKEN + ";five;", parser.testNextToken()); - assertEquals(CSVParser.TT_EOF + ";six;", parser.testNextToken()); - } + public void testDelimiterIsWhitespace() throws IOException { + String code = "one\ttwo\t\tfour \t five\t six"; + TestCSVParser parser = new TestCSVParser(new StringReader(code), CSVStrategy.TDF_STRATEGY); + assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";four;", parser.testNextToken()); + assertEquals(CSVParser.TT_TOKEN + ";five;", parser.testNextToken()); + assertEquals(CSVParser.TT_EOF + ";six;", parser.testNextToken()); + } } diff --git a/src/test/org/apache/commons/csv/CSVPrinterTest.java b/src/test/org/apache/commons/csv/CSVPrinterTest.java index 2b15aced..cc4ec9fd 100644 --- a/src/test/org/apache/commons/csv/CSVPrinterTest.java +++ b/src/test/org/apache/commons/csv/CSVPrinterTest.java @@ -30,200 +30,219 @@ import junit.framework.TestSuite; * CSVPrinterTest */ public class CSVPrinterTest extends TestCase { - - String lineSeparator = "\n"; - public void testPrinter1() throws IOException { - StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY); - String[] line1 = {"a", "b"}; - printer.println(line1); - assertEquals("a,b" + lineSeparator, sw.toString()); - } + String lineSeparator = "\n"; - public void testPrinter2() throws IOException { - StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY); - String[] line1 = {"a,b", "b"}; - printer.println(line1); - assertEquals("\"a,b\",b" + lineSeparator, sw.toString()); - } - - public void testPrinter3() throws IOException { - StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY); - String[] line1 = {"a, b", "b "}; - printer.println(line1); - assertEquals("\"a, b\",\"b \"" + lineSeparator, sw.toString()); - } - - public void testPrinter4() throws IOException { - StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY); - String[] line1 = {"a", "b\"c"}; - printer.println(line1); - assertEquals("a,\"b\"\"c\"" + lineSeparator, sw.toString()); - } - - public void testPrinter5() throws IOException { - StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY); - String[] line1 = {"a", "b\nc"}; - printer.println(line1); - assertEquals("a,\"b\nc\"" + lineSeparator, sw.toString()); - } - - public void testPrinter6() throws IOException { - StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY); - String[] line1 = {"a", "b\r\nc"}; - printer.println(line1); - assertEquals("a,\"b\r\nc\"" + lineSeparator, sw.toString()); - } - - public void testPrinter7() throws IOException { - StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY); - String[] line1 = {"a", "b\\c"}; - printer.println(line1); - assertEquals("a,b\\c" + lineSeparator, sw.toString()); - } - - public void testExcelPrinter1() throws IOException { - StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.EXCEL_STRATEGY); - String[] line1 = {"a", "b"}; - printer.println(line1); - assertEquals("a,b" + lineSeparator, sw.toString()); - } - - public void testExcelPrinter2() throws IOException { - StringWriter sw = new StringWriter(); - CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.EXCEL_STRATEGY); - String[] line1 = {"a,b", "b"}; - printer.println(line1); - assertEquals("\"a,b\",b" + lineSeparator, sw.toString()); - } - - - - public void testRandom() throws Exception { - int iter=10000; - strategy = CSVStrategy.DEFAULT_STRATEGY; - doRandom(iter); - strategy = CSVStrategy.EXCEL_STRATEGY; - doRandom(iter); - - // Strategy for MySQL - strategy = new CSVStrategy('\t', CSVStrategy.ENCAPSULATOR_DISABLED, CSVStrategy.COMMENTS_DISABLED,'\\',false, false, false, false); - doRandom(iter); - } - - Random r = new Random(); - CSVStrategy strategy; - - public void doRandom(int iter) throws Exception { - for (int i=0; i=128) { - sb.append("(" + (int)ch + ")"); - } else { - sb.append(ch); - } - } - return sb.toString(); - } + public void doOneRandom() throws Exception { + int nLines = r.nextInt(4) + 1; + int nCol = r.nextInt(3) + 1; + // nLines=1;nCol=2; + String[][] lines = new String[nLines][]; + for (int i = 0; i < nLines; i++) { + String[] line = new String[nCol]; + lines[i] = line; + for (int j = 0; j < nCol; j++) { + line[j] = randStr(); + } + } - public String randStr() { - int sz = r.nextInt(20); - // sz = r.nextInt(3); - char[] buf = new char[sz]; - for (int i=0; i= 128) { + sb.append("(" + (int) ch + ")"); + } else { + sb.append(ch); + } + } + return sb.toString(); + } + + public String randStr() { + int sz = r.nextInt(20); + // sz = r.nextInt(3); + char[] buf = new char[sz]; + for (int i = 0; i < sz; i++) { + // stick in special chars with greater frequency + char ch; + int what = r.nextInt(20); + switch (what) { + case 0: + ch = '\r'; + break; + case 1: + ch = '\n'; + break; + case 2: + ch = '\t'; + break; + case 3: + ch = '\f'; + break; + case 4: + ch = ' '; + break; + case 5: + ch = ','; + break; + case 6: + ch = '"'; + break; + case 7: + ch = '\''; + break; + case 8: + ch = '\\'; + break; + default: + ch = (char) r.nextInt(300); + break; + // default: ch = 'a'; break; + } + buf[i] = ch; + } + return new String(buf); } - return new String(buf); - } } diff --git a/src/test/org/apache/commons/csv/CSVStrategyTest.java b/src/test/org/apache/commons/csv/CSVStrategyTest.java index 137bf883..489928c8 100644 --- a/src/test/org/apache/commons/csv/CSVStrategyTest.java +++ b/src/test/org/apache/commons/csv/CSVStrategyTest.java @@ -24,68 +24,68 @@ import junit.framework.TestCase; * CSVStrategyTest * * The test are organized in three different sections: - * The 'setter/getter' section, the lexer section and finally the strategy - * section. In case a test fails, you should follow a top-down approach for + * The 'setter/getter' section, the lexer section and finally the strategy + * section. In case a test fails, you should follow a top-down approach for * fixing a potential bug (its likely that the strategy itself fails if the lexer * has problems...). */ public class CSVStrategyTest extends TestCase { - // ====================================================== - // getters / setters - // ====================================================== - public void testGetSetCommentStart() { - CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone(); - strategy.setCommentStart('#'); - assertEquals(strategy.getCommentStart(), '#'); - strategy.setCommentStart('!'); - assertEquals(strategy.getCommentStart(), '!'); - } + // ====================================================== + // getters / setters + // ====================================================== + public void testGetSetCommentStart() { + CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone(); + strategy.setCommentStart('#'); + assertEquals(strategy.getCommentStart(), '#'); + strategy.setCommentStart('!'); + assertEquals(strategy.getCommentStart(), '!'); + } - public void testGetSetEncapsulator() { - CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone(); - strategy.setEncapsulator('"'); - assertEquals(strategy.getEncapsulator(), '"'); - strategy.setEncapsulator('\''); - assertEquals(strategy.getEncapsulator(), '\''); - } + public void testGetSetEncapsulator() { + CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone(); + strategy.setEncapsulator('"'); + assertEquals(strategy.getEncapsulator(), '"'); + strategy.setEncapsulator('\''); + assertEquals(strategy.getEncapsulator(), '\''); + } - public void testGetSetDelimiter() { - CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone(); - strategy.setDelimiter(';'); - assertEquals(strategy.getDelimiter(), ';'); - strategy.setDelimiter(','); - assertEquals(strategy.getDelimiter(), ','); - strategy.setDelimiter('\t'); - assertEquals(strategy.getDelimiter(), '\t'); - } + public void testGetSetDelimiter() { + CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone(); + strategy.setDelimiter(';'); + assertEquals(strategy.getDelimiter(), ';'); + strategy.setDelimiter(','); + assertEquals(strategy.getDelimiter(), ','); + strategy.setDelimiter('\t'); + assertEquals(strategy.getDelimiter(), '\t'); + } + + public void testSetCSVStrategy() { + CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY; + // default settings + assertEquals(strategy.getDelimiter(), ','); + assertEquals(strategy.getEncapsulator(), '"'); + assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED); + assertEquals(true, strategy.getIgnoreLeadingWhitespaces()); + assertEquals(false, strategy.getUnicodeEscapeInterpretation()); + assertEquals(true, strategy.getIgnoreEmptyLines()); + // explicit csv settings + assertEquals(strategy.getDelimiter(), ','); + assertEquals(strategy.getEncapsulator(), '"'); + assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED); + assertEquals(true, strategy.getIgnoreLeadingWhitespaces()); + assertEquals(false, strategy.getUnicodeEscapeInterpretation()); + assertEquals(true, strategy.getIgnoreEmptyLines()); + } + + public void testSetExcelStrategy() { + CSVStrategy strategy = CSVStrategy.EXCEL_STRATEGY; + assertEquals(strategy.getDelimiter(), ','); + assertEquals(strategy.getEncapsulator(), '"'); + assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED); + assertEquals(false, strategy.getIgnoreLeadingWhitespaces()); + assertEquals(false, strategy.getUnicodeEscapeInterpretation()); + assertEquals(false, strategy.getIgnoreEmptyLines()); + } - public void testSetCSVStrategy() { - CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY; - // default settings - assertEquals(strategy.getDelimiter(), ','); - assertEquals(strategy.getEncapsulator(), '"'); - assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED); - assertEquals(true, strategy.getIgnoreLeadingWhitespaces()); - assertEquals(false, strategy.getUnicodeEscapeInterpretation()); - assertEquals(true, strategy.getIgnoreEmptyLines()); - // explicit csv settings - assertEquals(strategy.getDelimiter(), ','); - assertEquals(strategy.getEncapsulator(), '"'); - assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED); - assertEquals(true, strategy.getIgnoreLeadingWhitespaces()); - assertEquals(false, strategy.getUnicodeEscapeInterpretation()); - assertEquals(true, strategy.getIgnoreEmptyLines()); - } - - public void testSetExcelStrategy() { - CSVStrategy strategy = CSVStrategy.EXCEL_STRATEGY; - assertEquals(strategy.getDelimiter(), ','); - assertEquals(strategy.getEncapsulator(), '"'); - assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED); - assertEquals(false, strategy.getIgnoreLeadingWhitespaces()); - assertEquals(false, strategy.getUnicodeEscapeInterpretation()); - assertEquals(false, strategy.getIgnoreEmptyLines()); - } - } diff --git a/src/test/org/apache/commons/csv/CSVUtilsTest.java b/src/test/org/apache/commons/csv/CSVUtilsTest.java index c32cefab..94c5a54f 100644 --- a/src/test/org/apache/commons/csv/CSVUtilsTest.java +++ b/src/test/org/apache/commons/csv/CSVUtilsTest.java @@ -24,127 +24,127 @@ import junit.framework.TestCase; * CSVUtilsTest */ public class CSVUtilsTest extends TestCase { - - // ====================================================== - // static parser tests - // ====================================================== - public void testParse1() throws IOException { - String[][] data = CSVUtils.parse("abc\ndef"); - assertEquals(2, data.length); - assertEquals(1, data[0].length); - assertEquals(1, data[1].length); - assertEquals("abc", data[0][0]); - assertEquals("def", data[1][0]); + + // ====================================================== + // static parser tests + // ====================================================== + public void testParse1() throws IOException { + String[][] data = CSVUtils.parse("abc\ndef"); + assertEquals(2, data.length); + assertEquals(1, data[0].length); + assertEquals(1, data[1].length); + assertEquals("abc", data[0][0]); + assertEquals("def", data[1][0]); } public void testParse2() throws IOException { - String[][] data = CSVUtils.parse("abc,def,\"ghi,jkl\"\ndef"); - assertEquals(2, data.length); - assertEquals(3, data[0].length); - assertEquals(1, data[1].length); - assertEquals("abc", data[0][0]); - assertEquals("def", data[0][1]); - assertEquals("ghi,jkl", data[0][2]); - assertEquals("def", data[1][0]); + String[][] data = CSVUtils.parse("abc,def,\"ghi,jkl\"\ndef"); + assertEquals(2, data.length); + assertEquals(3, data[0].length); + assertEquals(1, data[1].length); + assertEquals("abc", data[0][0]); + assertEquals("def", data[0][1]); + assertEquals("ghi,jkl", data[0][2]); + assertEquals("def", data[1][0]); } public void testParse3() throws IOException { - String[][] data = CSVUtils.parse("abc,\"def\nghi\"\njkl"); - assertEquals(2, data.length); - assertEquals(2, data[0].length); - assertEquals(1, data[1].length); - assertEquals("abc", data[0][0]); - assertEquals("def\nghi", data[0][1]); - assertEquals("jkl", data[1][0]); + String[][] data = CSVUtils.parse("abc,\"def\nghi\"\njkl"); + assertEquals(2, data.length); + assertEquals(2, data[0].length); + assertEquals(1, data[1].length); + assertEquals("abc", data[0][0]); + assertEquals("def\nghi", data[0][1]); + assertEquals("jkl", data[1][0]); } public void testParse4() throws IOException { - String[][] data = CSVUtils.parse("abc,\"def\\\\nghi\"\njkl"); - assertEquals(2, data.length); - assertEquals(2, data[0].length); - assertEquals(1, data[1].length); - assertEquals("abc", data[0][0]); - // an escape char in quotes only escapes a delimiter, not itself - assertEquals("def\\\\nghi", data[0][1]); - assertEquals("jkl", data[1][0]); + String[][] data = CSVUtils.parse("abc,\"def\\\\nghi\"\njkl"); + assertEquals(2, data.length); + assertEquals(2, data[0].length); + assertEquals(1, data[1].length); + assertEquals("abc", data[0][0]); + // an escape char in quotes only escapes a delimiter, not itself + assertEquals("def\\\\nghi", data[0][1]); + assertEquals("jkl", data[1][0]); } public void testParse5() throws IOException { - String[][] data = CSVUtils.parse("abc,def\\nghi\njkl"); - assertEquals(2, data.length); - assertEquals(2, data[0].length); - assertEquals(1, data[1].length); - assertEquals("abc", data[0][0]); - assertEquals("def\\nghi", data[0][1]); - assertEquals("jkl", data[1][0]); + String[][] data = CSVUtils.parse("abc,def\\nghi\njkl"); + assertEquals(2, data.length); + assertEquals(2, data[0].length); + assertEquals(1, data[1].length); + assertEquals("abc", data[0][0]); + assertEquals("def\\nghi", data[0][1]); + assertEquals("jkl", data[1][0]); } - + public void testParse6() throws IOException { - String[][] data = CSVUtils.parse(""); - // default strategy is CSV, which ignores empty lines - assertEquals(0, data.length); + String[][] data = CSVUtils.parse(""); + // default strategy is CSV, which ignores empty lines + assertEquals(0, data.length); } - + public void testParse7() throws IOException { - boolean io = false; - try { - CSVUtils.parse(null); - } catch (IllegalArgumentException e) { - io = true; - } - assertTrue(io); + boolean io = false; + try { + CSVUtils.parse(null); + } catch (IllegalArgumentException e) { + io = true; + } + assertTrue(io); } - + public void testParseLine1() throws IOException { - String[] data = CSVUtils.parseLine("abc,def,ghi"); - assertEquals(3, data.length); - assertEquals("abc", data[0]); - assertEquals("def", data[1]); - assertEquals("ghi", data[2]); + String[] data = CSVUtils.parseLine("abc,def,ghi"); + assertEquals(3, data.length); + assertEquals("abc", data[0]); + assertEquals("def", data[1]); + assertEquals("ghi", data[2]); } public void testParseLine2() throws IOException { - String[] data = CSVUtils.parseLine("abc,def,ghi\n"); - assertEquals(3, data.length); - assertEquals("abc", data[0]); - assertEquals("def", data[1]); - assertEquals("ghi", data[2]); + String[] data = CSVUtils.parseLine("abc,def,ghi\n"); + assertEquals(3, data.length); + assertEquals("abc", data[0]); + assertEquals("def", data[1]); + assertEquals("ghi", data[2]); } public void testParseLine3() throws IOException { - String[] data = CSVUtils.parseLine("abc,\"def,ghi\""); - assertEquals(2, data.length); - assertEquals("abc", data[0]); - assertEquals("def,ghi", data[1]); + String[] data = CSVUtils.parseLine("abc,\"def,ghi\""); + assertEquals(2, data.length); + assertEquals("abc", data[0]); + assertEquals("def,ghi", data[1]); } public void testParseLine4() throws IOException { - String[] data = CSVUtils.parseLine("abc,\"def\nghi\""); - assertEquals(2, data.length); - assertEquals("abc", data[0]); - assertEquals("def\nghi", data[1]); + String[] data = CSVUtils.parseLine("abc,\"def\nghi\""); + assertEquals(2, data.length); + assertEquals("abc", data[0]); + assertEquals("def\nghi", data[1]); } - + public void testParseLine5() throws IOException { - String[] data = CSVUtils.parseLine(""); - assertEquals(0, data.length); - // assertEquals("", data[0]); + String[] data = CSVUtils.parseLine(""); + assertEquals(0, data.length); + // assertEquals("", data[0]); } - + public void testParseLine6() throws IOException { - boolean io = false; - try { - CSVUtils.parseLine(null); - } catch (IllegalArgumentException e) { - io = true; - } - assertTrue(io); + boolean io = false; + try { + CSVUtils.parseLine(null); + } catch (IllegalArgumentException e) { + io = true; + } + assertTrue(io); } - + public void testParseLine7() throws IOException { - String[] res = CSVUtils.parseLine(""); - assertNotNull(res); - assertEquals(0, res.length); + String[] res = CSVUtils.parseLine(""); + assertNotNull(res); + assertEquals(0, res.length); } - + } diff --git a/src/test/org/apache/commons/csv/CharBufferTest.java b/src/test/org/apache/commons/csv/CharBufferTest.java index dc0d758e..24767445 100644 --- a/src/test/org/apache/commons/csv/CharBufferTest.java +++ b/src/test/org/apache/commons/csv/CharBufferTest.java @@ -21,7 +21,6 @@ package org.apache.commons.csv; import junit.framework.TestCase; /** - * * @author Ortwin Glück */ public class CharBufferTest extends TestCase { @@ -31,14 +30,14 @@ public class CharBufferTest extends TestCase { try { cb = new CharBuffer(0); fail("Should not be possible"); - } catch(IllegalArgumentException e) { + } catch (IllegalArgumentException e) { // expected } - + cb = new CharBuffer(128); assertEquals(0, cb.length()); } - + public void testAppendChar() { CharBuffer cb = new CharBuffer(1); String expected = ""; @@ -49,59 +48,59 @@ public class CharBufferTest extends TestCase { assertEquals(expected.length(), cb.length()); } } - + public void testAppendCharArray() { CharBuffer cb = new CharBuffer(1); char[] abcd = "abcd".toCharArray(); String expected = ""; - for (int i=0; i<10; i++) { + for (int i = 0; i < 10; i++) { cb.append(abcd); expected += "abcd"; assertEquals(expected, cb.toString()); - assertEquals(4*(i+1), cb.length()); + assertEquals(4 * (i + 1), cb.length()); } } - + public void testAppendString() { CharBuffer cb = new CharBuffer(1); String abcd = "abcd"; String expected = ""; - for (int i=0; i<10; i++) { + for (int i = 0; i < 10; i++) { cb.append(abcd); expected += abcd; assertEquals(expected, cb.toString()); - assertEquals(4*(i+1), cb.length()); + assertEquals(4 * (i + 1), cb.length()); } } - + public void testAppendStringBuffer() { CharBuffer cb = new CharBuffer(1); StringBuffer abcd = new StringBuffer("abcd"); String expected = ""; - for (int i=0; i<10; i++) { + for (int i = 0; i < 10; i++) { cb.append(abcd); expected += "abcd"; assertEquals(expected, cb.toString()); - assertEquals(4*(i+1), cb.length()); + assertEquals(4 * (i + 1), cb.length()); } } - + public void testAppendCharBuffer() { CharBuffer cb = new CharBuffer(1); CharBuffer abcd = new CharBuffer(17); abcd.append("abcd"); String expected = ""; - for (int i=0; i<10; i++) { + for (int i = 0; i < 10; i++) { cb.append(abcd); expected += "abcd"; assertEquals(expected, cb.toString()); - assertEquals(4*(i+1), cb.length()); + assertEquals(4 * (i + 1), cb.length()); } } - + public void testShrink() { String data = "123456789012345678901234567890"; - + CharBuffer cb = new CharBuffer(data.length() + 100); assertEquals(data.length() + 100, cb.capacity()); cb.append(data); @@ -112,24 +111,24 @@ public class CharBufferTest extends TestCase { assertEquals(data.length(), cb.length()); assertEquals(data, cb.toString()); } - + //-- the following test cases have been adapted from the HttpComponents project //-- written by Oleg Kalnichevski - + public void testSimpleAppend() throws Exception { CharBuffer buffer = new CharBuffer(16); - assertEquals(16, buffer.capacity()); + assertEquals(16, buffer.capacity()); assertEquals(0, buffer.length()); char[] b1 = buffer.getCharacters(); assertNotNull(b1); assertEquals(0, b1.length); assertEquals(0, buffer.length()); - - char[] tmp = new char[] { '1', '2', '3', '4'}; + + char[] tmp = new char[]{'1', '2', '3', '4'}; buffer.append(tmp); - assertEquals(16, buffer.capacity()); + assertEquals(16, buffer.capacity()); assertEquals(4, buffer.length()); - + char[] b2 = buffer.getCharacters(); assertNotNull(b2); assertEquals(4, b2.length); @@ -137,35 +136,35 @@ public class CharBufferTest extends TestCase { assertEquals(tmp[i], b2[i]); } assertEquals("1234", buffer.toString()); - + buffer.clear(); - assertEquals(16, buffer.capacity()); + assertEquals(16, buffer.capacity()); assertEquals(0, buffer.length()); } - + public void testAppendString2() throws Exception { CharBuffer buffer = new CharBuffer(8); buffer.append("stuff"); buffer.append(" and more stuff"); assertEquals("stuff and more stuff", buffer.toString()); } - + public void testAppendNull() throws Exception { CharBuffer buffer = new CharBuffer(8); - - buffer.append((StringBuffer)null); - assertEquals("", buffer.toString()); - - buffer.append((String)null); + + buffer.append((StringBuffer) null); assertEquals("", buffer.toString()); - buffer.append((CharBuffer)null); + buffer.append((String) null); assertEquals("", buffer.toString()); - buffer.append((char[])null); + buffer.append((CharBuffer) null); + assertEquals("", buffer.toString()); + + buffer.append((char[]) null); assertEquals("", buffer.toString()); } - + public void testAppendCharArrayBuffer() throws Exception { CharBuffer buffer1 = new CharBuffer(8); buffer1.append(" and more stuff"); @@ -174,7 +173,7 @@ public class CharBufferTest extends TestCase { buffer2.append(buffer1); assertEquals("stuff and more stuff", buffer2.toString()); } - + public void testAppendSingleChar() throws Exception { CharBuffer buffer = new CharBuffer(4); buffer.append('1'); @@ -185,7 +184,7 @@ public class CharBufferTest extends TestCase { buffer.append('6'); assertEquals("123456", buffer.toString()); } - + public void testProvideCapacity() throws Exception { CharBuffer buffer = new CharBuffer(4); buffer.provideCapacity(2); diff --git a/src/test/org/apache/commons/csv/ExtendedBufferedReaderTest.java b/src/test/org/apache/commons/csv/ExtendedBufferedReaderTest.java index 5f4ce2d3..82271429 100644 --- a/src/test/org/apache/commons/csv/ExtendedBufferedReaderTest.java +++ b/src/test/org/apache/commons/csv/ExtendedBufferedReaderTest.java @@ -25,139 +25,138 @@ import junit.framework.TestSuite; /** * ExtendedBufferedReaderTest - * */ public class ExtendedBufferedReaderTest extends TestCase { - // ====================================================== - // the test cases - // ====================================================== - - public void testConstructors() { - ExtendedBufferedReader br = new ExtendedBufferedReader(new StringReader("")); - br = new ExtendedBufferedReader(new StringReader(""), 10); - } - - public void testReadLookahead1() throws Exception { - - assertEquals(ExtendedBufferedReader.END_OF_STREAM, getEBR("").read()); - ExtendedBufferedReader br = getEBR("1\n2\r3\n"); - assertEquals('1', br.lookAhead()); - assertEquals(ExtendedBufferedReader.UNDEFINED, br.readAgain()); - assertEquals('1', br.read()); - assertEquals('1', br.readAgain()); + // ====================================================== + // the test cases + // ====================================================== - assertEquals(0, br.getLineNumber()); - assertEquals('\n', br.lookAhead()); - assertEquals(0, br.getLineNumber()); - assertEquals('1', br.readAgain()); - assertEquals('\n', br.read()); - assertEquals(1, br.getLineNumber()); - assertEquals('\n', br.readAgain()); - assertEquals(1, br.getLineNumber()); - - assertEquals('2', br.lookAhead()); - assertEquals(1, br.getLineNumber()); - assertEquals('\n', br.readAgain()); - assertEquals(1, br.getLineNumber()); - assertEquals('2', br.read()); - assertEquals('2', br.readAgain()); - - assertEquals('\r', br.lookAhead()); - assertEquals('2', br.readAgain()); - assertEquals('\r', br.read()); - assertEquals('\r', br.readAgain()); - - assertEquals('3', br.lookAhead()); - assertEquals('\r', br.readAgain()); - assertEquals('3', br.read()); - assertEquals('3', br.readAgain()); - - assertEquals('\n', br.lookAhead()); - assertEquals(1, br.getLineNumber()); - assertEquals('3', br.readAgain()); - assertEquals('\n', br.read()); - assertEquals(2, br.getLineNumber()); - assertEquals('\n', br.readAgain()); - assertEquals(2, br.getLineNumber()); - - assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.lookAhead()); - assertEquals('\n', br.readAgain()); - assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.read()); - assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.readAgain()); - assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.read()); - assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.lookAhead()); - - } - + public void testConstructors() { + ExtendedBufferedReader br = new ExtendedBufferedReader(new StringReader("")); + br = new ExtendedBufferedReader(new StringReader(""), 10); + } - public void testReadLookahead2() throws Exception { - char[] ref = new char[5]; - char[] res = new char[5]; - - ExtendedBufferedReader br = getEBR(""); - assertEquals(0, br.read(res, 0, 0)); - assertTrue(Arrays.equals(res, ref)); - - br = getEBR("abcdefg"); - ref[0] = 'a'; - ref[1] = 'b'; - ref[2] = 'c'; - assertEquals(3, br.read(res, 0, 3)); - assertTrue(Arrays.equals(res, ref)); - assertEquals('c', br.readAgain()); - - assertEquals('d', br.lookAhead()); - ref[4] = 'd'; - assertEquals(1, br.read(res, 4, 1)); - assertTrue(Arrays.equals(res, ref)); - assertEquals('d', br.readAgain()); - - } - - public void testReadLine() throws Exception { - ExtendedBufferedReader br = getEBR(""); - assertTrue(br.readLine() == null); - - br = getEBR("\n"); - assertTrue(br.readLine().equals("")); - assertTrue(br.readLine() == null); - - br = getEBR("foo\n\nhello"); - assertEquals(0, br.getLineNumber()); - assertTrue(br.readLine().equals("foo")); - assertEquals(1, br.getLineNumber()); - assertTrue(br.readLine().equals("")); - assertEquals(2, br.getLineNumber()); - assertTrue(br.readLine().equals("hello")); - assertEquals(3, br.getLineNumber()); - assertTrue(br.readLine() == null); - assertEquals(3, br.getLineNumber()); - - br = getEBR("foo\n\nhello"); - assertEquals('f', br.read()); - assertEquals('o', br.lookAhead()); - assertTrue(br.readLine().equals("oo")); - assertEquals(1, br.getLineNumber()); - assertEquals('\n', br.lookAhead()); - assertTrue(br.readLine().equals("")); - assertEquals(2, br.getLineNumber()); - assertEquals('h', br.lookAhead()); - assertTrue(br.readLine().equals("hello")); - assertTrue(br.readLine() == null); - assertEquals(3, br.getLineNumber()); - - - br = getEBR("foo\rbaar\r\nfoo"); - assertTrue(br.readLine().equals("foo")); - assertEquals('b', br.lookAhead()); - assertTrue(br.readLine().equals("baar")); - assertEquals('f', br.lookAhead()); - assertTrue(br.readLine().equals("foo")); - assertTrue(br.readLine() == null); - } - - private ExtendedBufferedReader getEBR(String s) { - return new ExtendedBufferedReader(new StringReader(s)); - } + public void testReadLookahead1() throws Exception { + + assertEquals(ExtendedBufferedReader.END_OF_STREAM, getEBR("").read()); + ExtendedBufferedReader br = getEBR("1\n2\r3\n"); + assertEquals('1', br.lookAhead()); + assertEquals(ExtendedBufferedReader.UNDEFINED, br.readAgain()); + assertEquals('1', br.read()); + assertEquals('1', br.readAgain()); + + assertEquals(0, br.getLineNumber()); + assertEquals('\n', br.lookAhead()); + assertEquals(0, br.getLineNumber()); + assertEquals('1', br.readAgain()); + assertEquals('\n', br.read()); + assertEquals(1, br.getLineNumber()); + assertEquals('\n', br.readAgain()); + assertEquals(1, br.getLineNumber()); + + assertEquals('2', br.lookAhead()); + assertEquals(1, br.getLineNumber()); + assertEquals('\n', br.readAgain()); + assertEquals(1, br.getLineNumber()); + assertEquals('2', br.read()); + assertEquals('2', br.readAgain()); + + assertEquals('\r', br.lookAhead()); + assertEquals('2', br.readAgain()); + assertEquals('\r', br.read()); + assertEquals('\r', br.readAgain()); + + assertEquals('3', br.lookAhead()); + assertEquals('\r', br.readAgain()); + assertEquals('3', br.read()); + assertEquals('3', br.readAgain()); + + assertEquals('\n', br.lookAhead()); + assertEquals(1, br.getLineNumber()); + assertEquals('3', br.readAgain()); + assertEquals('\n', br.read()); + assertEquals(2, br.getLineNumber()); + assertEquals('\n', br.readAgain()); + assertEquals(2, br.getLineNumber()); + + assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.lookAhead()); + assertEquals('\n', br.readAgain()); + assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.read()); + assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.readAgain()); + assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.read()); + assertEquals(ExtendedBufferedReader.END_OF_STREAM, br.lookAhead()); + + } + + + public void testReadLookahead2() throws Exception { + char[] ref = new char[5]; + char[] res = new char[5]; + + ExtendedBufferedReader br = getEBR(""); + assertEquals(0, br.read(res, 0, 0)); + assertTrue(Arrays.equals(res, ref)); + + br = getEBR("abcdefg"); + ref[0] = 'a'; + ref[1] = 'b'; + ref[2] = 'c'; + assertEquals(3, br.read(res, 0, 3)); + assertTrue(Arrays.equals(res, ref)); + assertEquals('c', br.readAgain()); + + assertEquals('d', br.lookAhead()); + ref[4] = 'd'; + assertEquals(1, br.read(res, 4, 1)); + assertTrue(Arrays.equals(res, ref)); + assertEquals('d', br.readAgain()); + + } + + public void testReadLine() throws Exception { + ExtendedBufferedReader br = getEBR(""); + assertTrue(br.readLine() == null); + + br = getEBR("\n"); + assertTrue(br.readLine().equals("")); + assertTrue(br.readLine() == null); + + br = getEBR("foo\n\nhello"); + assertEquals(0, br.getLineNumber()); + assertTrue(br.readLine().equals("foo")); + assertEquals(1, br.getLineNumber()); + assertTrue(br.readLine().equals("")); + assertEquals(2, br.getLineNumber()); + assertTrue(br.readLine().equals("hello")); + assertEquals(3, br.getLineNumber()); + assertTrue(br.readLine() == null); + assertEquals(3, br.getLineNumber()); + + br = getEBR("foo\n\nhello"); + assertEquals('f', br.read()); + assertEquals('o', br.lookAhead()); + assertTrue(br.readLine().equals("oo")); + assertEquals(1, br.getLineNumber()); + assertEquals('\n', br.lookAhead()); + assertTrue(br.readLine().equals("")); + assertEquals(2, br.getLineNumber()); + assertEquals('h', br.lookAhead()); + assertTrue(br.readLine().equals("hello")); + assertTrue(br.readLine() == null); + assertEquals(3, br.getLineNumber()); + + + br = getEBR("foo\rbaar\r\nfoo"); + assertTrue(br.readLine().equals("foo")); + assertEquals('b', br.lookAhead()); + assertTrue(br.readLine().equals("baar")); + assertEquals('f', br.lookAhead()); + assertTrue(br.readLine().equals("foo")); + assertTrue(br.readLine() == null); + } + + private ExtendedBufferedReader getEBR(String s) { + return new ExtendedBufferedReader(new StringReader(s)); + } } diff --git a/src/test/org/apache/commons/csv/writer/CSVConfigGuesserTest.java b/src/test/org/apache/commons/csv/writer/CSVConfigGuesserTest.java index 90000080..d2d18e4e 100644 --- a/src/test/org/apache/commons/csv/writer/CSVConfigGuesserTest.java +++ b/src/test/org/apache/commons/csv/writer/CSVConfigGuesserTest.java @@ -41,10 +41,10 @@ public class CSVConfigGuesserTest extends TestCase { guesser.setHasFieldHeader(true); assertEquals(true, guesser.hasFieldHeader()); } + /** * Test a format like - * 1234 ; abcd ; 1234 ; - * + * 1234 ; abcd ; 1234 ; */ public void testConfigGuess1() { CSVConfig expected = new CSVConfig(); @@ -67,11 +67,11 @@ public class CSVConfigGuesserTest extends TestCase { assertEquals(expected.getFields().length, guessed.getFields().length); assertEquals(expected.getFields()[0].getSize(), guessed.getFields()[0].getSize()); } + /** * Test a format like - * 1234,123123,12312312,213123 - * 1,2,3,4 - * + * 1234,123123,12312312,213123 + * 1,2,3,4 */ public void testConfigGuess2() { CSVConfig expected = new CSVConfig(); diff --git a/src/test/org/apache/commons/csv/writer/CSVConfigTest.java b/src/test/org/apache/commons/csv/writer/CSVConfigTest.java index 0835776c..482167ec 100644 --- a/src/test/org/apache/commons/csv/writer/CSVConfigTest.java +++ b/src/test/org/apache/commons/csv/writer/CSVConfigTest.java @@ -29,7 +29,7 @@ import junit.framework.TestCase; * @version $Id: $ */ public class CSVConfigTest extends TestCase { - + public void testFixedWith() { CSVConfig config = new CSVConfig(); @@ -37,13 +37,13 @@ public class CSVConfigTest extends TestCase { config.setFixedWidth(true); assertEquals(true, config.isFixedWidth()); } - + public void testFields() { CSVConfig config = new CSVConfig(); assertEquals(0, config.getFields().length); - config.setFields((CSVField[])null); + config.setFields((CSVField[]) null); assertEquals(0, config.getFields().length); - config.setFields((Collection)null); + config.setFields((Collection) null); assertEquals(0, config.getFields().length); CSVField field = new CSVField(); field.setName("field1"); @@ -53,7 +53,7 @@ public class CSVConfigTest extends TestCase { assertEquals(null, config.getField("field11")); assertEquals(field, config.getField("field1")); } - + public void testFill() { CSVConfig config = new CSVConfig(); assertEquals(CSVConfig.FILLNONE, config.getFill()); @@ -65,7 +65,7 @@ public class CSVConfigTest extends TestCase { config.setFillChar('m'); assertEquals('m', config.getFillChar()); } - + public void testDelimiter() { CSVConfig config = new CSVConfig(); assertEquals(',', config.getDelimiter()); @@ -75,7 +75,7 @@ public class CSVConfigTest extends TestCase { config.setIgnoreDelimiter(true); assertEquals(true, config.isDelimiterIgnored()); } - + public void testValueDelimiter() { CSVConfig config = new CSVConfig(); assertEquals('"', config.getValueDelimiter()); @@ -85,14 +85,14 @@ public class CSVConfigTest extends TestCase { config.setIgnoreValueDelimiter(false); assertEquals(false, config.isValueDelimiterIgnored()); } - + public void testFieldHeader() { CSVConfig config = new CSVConfig(); assertEquals(false, config.isFieldHeader()); config.setFieldHeader(true); assertEquals(true, config.isFieldHeader()); } - + public void testTrimEnd() { CSVConfig config = new CSVConfig(); assertEquals(false, config.isEndTrimmed()); diff --git a/src/test/org/apache/commons/csv/writer/CSVFieldTest.java b/src/test/org/apache/commons/csv/writer/CSVFieldTest.java index a5d216f6..df5796b4 100644 --- a/src/test/org/apache/commons/csv/writer/CSVFieldTest.java +++ b/src/test/org/apache/commons/csv/writer/CSVFieldTest.java @@ -21,7 +21,6 @@ package org.apache.commons.csv.writer; import junit.framework.TestCase; /** - * * @author Martin van den Bemt * @version $Id: $ */ @@ -41,7 +40,7 @@ public class CSVFieldTest extends TestCase { assertEquals("name", field.getName()); assertEquals(10, field.getSize()); } - + public void testFill() { CSVField field = new CSVField(); assertEquals(CSVConfig.FILLNONE, field.getFill()); diff --git a/src/test/org/apache/commons/csv/writer/CSVWriterTest.java b/src/test/org/apache/commons/csv/writer/CSVWriterTest.java index 39b989b6..9de5228a 100644 --- a/src/test/org/apache/commons/csv/writer/CSVWriterTest.java +++ b/src/test/org/apache/commons/csv/writer/CSVWriterTest.java @@ -26,17 +26,17 @@ import junit.framework.TestCase; /** * The testcase for the csv writer. - * + * * @author Martin van den Bemt * @version $Id: $ */ public class CSVWriterTest extends TestCase { private Map map; - + protected void setUp() throws Exception { super.setUp(); - + map = new HashMap(); map.put("field1", "12345"); map.put("field2", "1234");