diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index 90515c8c..acd88fa6 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -46,7 +46,7 @@ public class CSVFormat implements Cloneable, Serializable { */ public static final char DISABLED = '\ufffe'; - /** Standard comma separated format as defined by RFC 4180. */ + /** Standard comma separated format as defined by RFC 4180. */ public static final CSVFormat DEFAULT = new CSVFormat(',', '"', DISABLED, DISABLED, true, true, false, true); /** Excel file format (using a comma as the value delimiter). */ @@ -57,26 +57,33 @@ public class CSVFormat implements Cloneable, Serializable { /** - * Creates a CSVFormat with the default parameters. + * Creates a CSV format with the default parameters. */ public CSVFormat() { } + /** + * Creates a customized CSV format. + * + * @param delimiter the char used for value separation + * @param encapsulator the char used as value encapsulation marker + * @param commentStart the char used for comment identification + */ public CSVFormat(char delimiter, char encapsulator, char commentStart) { this(delimiter, encapsulator, commentStart, DISABLED, true, true, false, true); } /** - * Customized CSV format constructor. + * Creates a customized CSV format. * - * @param delimiter a char used for value separation - * @param encapsulator a char used as value encapsulation marker - * @param commentStart a char used for comment identification - * @param escape a char used to escape special characters in values - * @param leadingSpacesIgnored TRUE when leading whitespaces should be ignored - * @param trailingSpacesIgnored TRUE when trailing whitespaces should be ignored - * @param unicodeEscapesInterpreted TRUE when unicode escapes should be interpreted - * @param emptyLinesIgnored TRUE when the parser should skip emtpy lines + * @param delimiter the char used for value separation + * @param encapsulator the char used as value encapsulation marker + * @param commentStart the char used for comment identification + * @param escape the char used to escape special characters in values + * @param leadingSpacesIgnored true when leading whitespaces should be ignored + * @param trailingSpacesIgnored true when trailing whitespaces should be ignored + * @param unicodeEscapesInterpreted true when unicode escapes should be interpreted + * @param emptyLinesIgnored true when the parser should skip emtpy lines */ public CSVFormat( char delimiter, diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 88776d22..7b5b849f 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -64,15 +64,14 @@ public class CSVParser implements Iterable { /** Immutable empty String array. */ private static final String[] EMPTY_STRING_ARRAY = new String[0]; - // the input stream + /** The input stream */ private final ExtendedBufferedReader in; private final CSVFormat format; // the following objects are shared to reduce garbage - /** - * A record buffer for getLine(). Grows as necessary and is reused. - */ + + /** A record buffer for getLine(). Grows as necessary and is reused. */ private final List record = new ArrayList(); private final Token reusableToken = new Token(); private final CharBuffer wsBuf = new CharBuffer(); @@ -146,12 +145,10 @@ public class CSVParser implements Iterable { // ====================================================== /** - * Parses the CSV according to the given format - * and returns the content as an array of records - * (whereas records are arrays of single values). + * Parses the CSV according to the given format and returns the content + * as an array of records (whereas records are arrays of single values). *

- * The returned content starts at the current parse-position in - * the stream. + * The returned content starts at the current parse-position in the stream. * * @return matrix of records x values ('null' when end of file) * @throws IOException on parse error or input read-failure @@ -171,11 +168,9 @@ public class CSVParser implements Iterable { } /** - * Parses from the current point in the stream til - * the end of the current line. + * Parses from the current point in the stream til * the end of the current line. * - * @return array of values til end of line - * ('null' when end of file has been reached) + * @return array of values til end of line ('null' when end of file has been reached) * @throws IOException on parse error or input read-failure */ String[] getLine() throws IOException { @@ -209,7 +204,7 @@ public class CSVParser implements Iterable { } } if (!record.isEmpty()) { - ret = (String[]) record.toArray(new String[record.size()]); + ret = record.toArray(new String[record.size()]); } return ret; } @@ -283,11 +278,9 @@ public class CSVParser implements Iterable { /** * Returns the next token. *

- * A token corresponds to a term, a record change or an - * end-of-file indicator. + * A token corresponds to a term, a record change or an end-of-file indicator. * - * @param tkn an existing Token object to reuse. The caller is responsible to initialize the - * Token. + * @param tkn an existing Token object to reuse. The caller is responsible to initialize the Token. * @return the next token found * @throws IOException on stream access error */ @@ -380,9 +373,9 @@ public class CSVParser implements Iterable { * A simple token might contain escaped delimiters (as \, or \;). The * token is finished when one of the following conditions become true: *

    - *
  • end of line has been reached (EORECORD)
  • - *
  • end of stream has been reached (EOF)
  • - *
  • an unescaped delimiter has been reached (TOKEN)
  • + *
  • end of line has been reached (EORECORD)
  • + *
  • end of stream has been reached (EOF)
  • + *
  • an unescaped delimiter has been reached (TOKEN)
  • *
* * @param tkn the current token @@ -476,19 +469,13 @@ public class CSVParser implements Iterable { return tkn; } else if (!isWhitespace(c)) { // error invalid char between token and next delimiter - throw new IOException( - "(line " + getLineNumber() - + ") invalid char between encapsulated token end delimiter" - ); + throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter"); } } } } else if (isEndOfFile(c)) { // error condition (end of file before end of token) - throw new IOException( - "(startline " + startLineNumber + ")" - + "eof reached before encapsulated token finished" - ); + throw new IOException("(startline " + startLineNumber + ") EOF reached before encapsulated token finished"); } else { // consume character tkn.content.append((char) c); @@ -500,8 +487,7 @@ public class CSVParser implements Iterable { /** * Decodes Unicode escapes. *

- * Interpretation of "\\uXXXX" escape sequences - * where XXXX is a hex-number. + * Interpretation of "\\uXXXX" escape sequences where XXXX is a hex-number. * * @param c current char which is discarded because it's the "\\" of "\\uXXXX" * @return the decoded character @@ -555,10 +541,6 @@ public class CSVParser implements Iterable { return out; } - // ====================================================== - // strategies - // ====================================================== - /** * Obtain the specified CSV format. * diff --git a/src/main/java/org/apache/commons/csv/package.html b/src/main/java/org/apache/commons/csv/package.html index 9a3b27d7..44a78382 100644 --- a/src/main/java/org/apache/commons/csv/package.html +++ b/src/main/java/org/apache/commons/csv/package.html @@ -35,7 +35,7 @@ record := values* -

The following list contains the csv aspects the WAKE CSV parser supports:

+

The following list contains the csv aspects the Commons CSV parser supports:

Separators (for lines)
The record separators are hardcoded and cannot be changed. The must be '\n' or '\r\n'.
@@ -76,9 +76,11 @@

Example usage:

-String[] parsedLine = CSVParser.parseLine("a,b,c");
-for (int i = 0; i < parsedLine.length; ++i) {
-  System.out.println("value " + i + "=" + parsedLine[i]);
+Reader in = new StringReader("a,b,c");
+for (String[] line : CSVFormat.DEFAULT.parse(in)) {
+    for (int i = 0; i < line.length; i++) {
+        System.out.println("value " + i + "=" + line[i]);
+    }
 }