diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index 5e41cb5c..a2526ab2 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -24,7 +24,7 @@ import java.io.StringWriter; /** * The format specification of a CSV file. - * + * * This class is immutable. */ public class CSVFormat implements Serializable { @@ -48,10 +48,9 @@ public class CSVFormat implements Serializable { private final boolean isEncapsulating; /** - * Constant char to be used for disabling comments, escapes and encapsulation. - * The value -2 is used because it won't be confused with an EOF signal (-1), - * and because the unicode value FFFE would be encoded as two chars (using surrogates) - * and thus there should never be a collision with a real text char. + * Constant char to be used for disabling comments, escapes and encapsulation. The value -2 is used because it + * won't be confused with an EOF signal (-1), and because the unicode value FFFE would be encoded as two chars + * (using surrogates) and thus there should never be a collision with a real text char. */ static final char DISABLED = '\ufffe'; @@ -92,15 +91,14 @@ public class CSVFormat implements Serializable { .withLineSeparator(CRLF); /** - * Excel file format (using a comma as the value delimiter). - * Note that the actual value delimiter used by Excel is locale dependent, - * it might be necessary to customize this format to accomodate to your - * regional settings. + * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is + * locale dependent, it might be necessary to customize this format to accomodate to your regional settings. *

- * For example for parsing or generating a CSV file on a French system - * the following format will be used: - * - *

CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
+ * For example for parsing or generating a CSV file on a French system the following format will be used: + * + *
+     * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
+     * 
*/ public static final CSVFormat EXCEL = PRISTINE @@ -118,12 +116,12 @@ public class CSVFormat implements Serializable { .withLineSeparator(CRLF); /** - * Default MySQL format used by the SELECT INTO OUTFILE and - * LOAD DATA INFILE operations. This is a tab-delimited - * format with a LF character as the line separator. Values are not quoted - * and special characters are escaped with '\'. - * - * @see http://dev.mysql.com/doc/refman/5.1/en/load-data.html + * Default MySQL format used by the SELECT INTO OUTFILE and LOAD DATA INFILE operations. This is + * a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters + * are escaped with '\'. + * + * @see http://dev.mysql.com/doc/refman/5.1/en/load-data.html */ public static final CSVFormat MYSQL = PRISTINE @@ -134,25 +132,26 @@ public class CSVFormat implements Serializable { /** * Creates a customized CSV format. - * - * @param delimiter the char used for value separation - * @param encapsulator the char used as value encapsulation marker - * @param commentStart the char used for comment identification - * @param escape the char used to escape special characters in values - * @param surroundingSpacesIgnored true when whitespaces enclosing values should be ignored - * @param emptyLinesIgnored true when the parser should skip emtpy lines - * @param lineSeparator the line separator to use for output - * @param header the header + * + * @param delimiter + * the char used for value separation + * @param encapsulator + * the char used as value encapsulation marker + * @param commentStart + * the char used for comment identification + * @param escape + * the char used to escape special characters in values + * @param surroundingSpacesIgnored + * true when whitespaces enclosing values should be ignored + * @param emptyLinesIgnored + * true when the parser should skip emtpy lines + * @param lineSeparator + * the line separator to use for output + * @param header + * the header */ - CSVFormat( - char delimiter, - char encapsulator, - char commentStart, - char escape, - boolean surroundingSpacesIgnored, - boolean emptyLinesIgnored, - String lineSeparator, - String[] header) { + CSVFormat(char delimiter, char encapsulator, char commentStart, char escape, boolean surroundingSpacesIgnored, + boolean emptyLinesIgnored, String lineSeparator, String[] header) { this.delimiter = delimiter; this.encapsulator = encapsulator; this.commentStart = commentStart; @@ -168,9 +167,10 @@ public class CSVFormat implements Serializable { /** * Returns true if the given character is a line break character. - * - * @param c the character to check - * + * + * @param c + * the character to check + * * @return true if c is a line break character */ private static boolean isLineBreak(char c) { @@ -182,29 +182,34 @@ public class CSVFormat implements Serializable { */ void validate() throws IllegalArgumentException { if (delimiter == encapsulator) { - throw new IllegalArgumentException("The encapsulator character and the delimiter cannot be the same (\"" + encapsulator + "\")"); + throw new IllegalArgumentException("The encapsulator character and the delimiter cannot be the same (\"" + + encapsulator + "\")"); } if (delimiter == escape) { - throw new IllegalArgumentException("The escape character and the delimiter cannot be the same (\"" + escape + "\")"); + throw new IllegalArgumentException("The escape character and the delimiter cannot be the same (\"" + + escape + "\")"); } if (delimiter == commentStart) { - throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same (\"" + commentStart + "\")"); + throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same (\"" + + commentStart + "\")"); } if (encapsulator != DISABLED && encapsulator == commentStart) { - throw new IllegalArgumentException("The comment start character and the encapsulator cannot be the same (\"" + commentStart + "\")"); + throw new IllegalArgumentException( + "The comment start character and the encapsulator cannot be the same (\"" + commentStart + "\")"); } if (escape != DISABLED && escape == commentStart) { - throw new IllegalArgumentException("The comment start and the escape character cannot be the same (\"" + commentStart + "\")"); + throw new IllegalArgumentException("The comment start and the escape character cannot be the same (\"" + + commentStart + "\")"); } } /** * Returns the character delimiting the values (typically ';', ',' or '\t'). - * + * * @return the delimiter character */ public char getDelimiter() { @@ -213,22 +218,25 @@ public class CSVFormat implements Serializable { /** * Returns a copy of this format using the specified delimiter character. - * - * @param delimiter the delimiter character + * + * @param delimiter + * the delimiter character * @return A copy of this format using the specified delimiter character - * @throws IllegalArgumentException thrown if the specified character is a line break + * @throws IllegalArgumentException + * thrown if the specified character is a line break */ public CSVFormat withDelimiter(char delimiter) { if (isLineBreak(delimiter)) { throw new IllegalArgumentException("The delimiter cannot be a line break"); } - return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, + emptyLinesIgnored, lineSeparator, header); } /** * Returns the character used to encapsulate values containing special characters. - * + * * @return the encapsulator character */ public char getEncapsulator() { @@ -237,22 +245,25 @@ public class CSVFormat implements Serializable { /** * Returns a copy of this format using the specified encapsulator character. - * - * @param encapsulator the encapsulator character + * + * @param encapsulator + * the encapsulator character * @return A copy of this format using the specified encapsulator character - * @throws IllegalArgumentException thrown if the specified character is a line break + * @throws IllegalArgumentException + * thrown if the specified character is a line break */ public CSVFormat withEncapsulator(char encapsulator) { if (isLineBreak(encapsulator)) { throw new IllegalArgumentException("The encapsulator cannot be a line break"); } - return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, + emptyLinesIgnored, lineSeparator, header); } /** * Returns whether an encapsulator has been defined. - * + * * @return {@code true} if an encapsulator is defined */ public boolean isEncapsulating() { @@ -261,7 +272,7 @@ public class CSVFormat implements Serializable { /** * Returns the character marking the start of a line comment. - * + * * @return the comment start marker. */ public char getCommentStart() { @@ -270,28 +281,29 @@ public class CSVFormat implements Serializable { /** * Returns a copy of this format using the specified character as the comment start marker. - * - * Note that the comment introducer character is only recognised - * at the start of a line. - * - * @param commentStart the comment start marker + * + * Note that the comment introducer character is only recognised at the start of a line. + * + * @param commentStart + * the comment start marker * @return A copy of this format using the specified character as the comment start marker - * @throws IllegalArgumentException thrown if the specified character is a line break + * @throws IllegalArgumentException + * thrown if the specified character is a line break */ public CSVFormat withCommentStart(char commentStart) { if (isLineBreak(commentStart)) { throw new IllegalArgumentException("The comment start character cannot be a line break"); } - return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, + emptyLinesIgnored, lineSeparator, header); } /** * Specifies whether comments are supported by this format. - * - * Note that the comment introducer character is only recognised - * at the start of a line. - * + * + * Note that the comment introducer character is only recognised at the start of a line. + * * @return true is comments are supported, false otherwise */ public boolean isCommentingEnabled() { @@ -300,7 +312,7 @@ public class CSVFormat implements Serializable { /** * Returns the escape character. - * + * * @return the escape character */ public char getEscape() { @@ -309,22 +321,25 @@ public class CSVFormat implements Serializable { /** * Returns a copy of this format using the specified escape character. - * - * @param escape the escape character + * + * @param escape + * the escape character * @return A copy of this format using the specified escape character - * @throws IllegalArgumentException thrown if the specified character is a line break + * @throws IllegalArgumentException + * thrown if the specified character is a line break */ public CSVFormat withEscape(char escape) { if (isLineBreak(escape)) { throw new IllegalArgumentException("The escape character cannot be a line break"); } - return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, + emptyLinesIgnored, lineSeparator, header); } /** * Returns whether escape are being processed. - * + * * @return {@code true} if escapes are processed */ public boolean isEscaping() { @@ -333,8 +348,9 @@ public class CSVFormat implements Serializable { /** * Specifies whether spaces around values are ignored when parsing input. - * - * @return true if spaces around values are ignored, false if they are treated as part of the value. + * + * @return true if spaces around values are ignored, false if they are treated as part of the + * value. */ public boolean isSurroundingSpacesIgnored() { return surroundingSpacesIgnored; @@ -342,19 +358,22 @@ public class CSVFormat implements Serializable { /** * Returns a copy of this format with the specified trimming behavior. - * - * @param surroundingSpacesIgnored the trimming behavior, true to remove the surrounding spaces, - * false to leave the spaces as is. + * + * @param surroundingSpacesIgnored + * the trimming behavior, true to remove the surrounding spaces, false to leave the + * spaces as is. * @return A copy of this format with the specified trimming behavior. */ public CSVFormat withSurroundingSpacesIgnored(boolean surroundingSpacesIgnored) { - return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, + emptyLinesIgnored, lineSeparator, header); } /** * Specifies whether empty lines between records are ignored when parsing input. - * - * @return true if empty lines between records are ignored, false if they are turned into empty records. + * + * @return true if empty lines between records are ignored, false if they are turned into empty + * records. */ public boolean isEmptyLinesIgnored() { return emptyLinesIgnored; @@ -362,18 +381,20 @@ public class CSVFormat implements Serializable { /** * Returns a copy of this format with the specified empty line skipping behavior. - * - * @param emptyLinesIgnored the empty line skipping behavior, true to ignore the empty lines - * between the records, false to translate empty lines to empty records. - * @return A copy of this format with the specified empty line skipping behavior. + * + * @param emptyLinesIgnored + * the empty line skipping behavior, true to ignore the empty lines between the records, + * false to translate empty lines to empty records. + * @return A copy of this format with the specified empty line skipping behavior. */ public CSVFormat withEmptyLinesIgnored(boolean emptyLinesIgnored) { - return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, + emptyLinesIgnored, lineSeparator, header); } /** * Returns the line separator delimiting output records. - * + * * @return the line separator */ public String getLineSeparator() { @@ -382,13 +403,15 @@ public class CSVFormat implements Serializable { /** * Returns a copy of this format using the specified output line separator. - * - * @param lineSeparator the line separator to be used for output. - * + * + * @param lineSeparator + * the line separator to be used for output. + * * @return A copy of this format using the specified output line separator */ public CSVFormat withLineSeparator(String lineSeparator) { - return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, + emptyLinesIgnored, lineSeparator, header); } String[] getHeader() { @@ -396,27 +419,34 @@ public class CSVFormat implements Serializable { } /** - * Returns a copy of this format using the specified header. The header can - * either be parsed automatically from the input file with: - * - *
CSVFormat format = aformat.withHeader();
- * + * Returns a copy of this format using the specified header. The header can either be parsed automatically from the + * input file with: + * + *
+     * CSVFormat format = aformat.withHeader();
+     * 
+ * * or specified manually with: - * - *
CSVFormat format = aformat.withHeader("name", "email", "phone");
- * - * @param header the header, null if disabled, empty if parsed automatically, user specified otherwise. - * + * + *
+     * CSVFormat format = aformat.withHeader("name", "email", "phone");
+     * 
+ * + * @param header + * the header, null if disabled, empty if parsed automatically, user specified otherwise. + * * @return A copy of this format using the specified header */ public CSVFormat withHeader(String... header) { - return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); + return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, + emptyLinesIgnored, lineSeparator, header); } /** * Parses the specified content. - * - * @param in the input stream + * + * @param in + * the input stream */ public Iterable parse(Reader in) throws IOException { return new CSVParser(in, this); @@ -424,8 +454,9 @@ public class CSVFormat implements Serializable { /** * Format the specified values. - * - * @param values the values to format + * + * @param values + * the values to format */ public String format(String... values) { StringWriter out = new StringWriter(); diff --git a/src/main/java/org/apache/commons/csv/CSVLexer.java b/src/main/java/org/apache/commons/csv/CSVLexer.java index 47d2c180..acd47ecd 100644 --- a/src/main/java/org/apache/commons/csv/CSVLexer.java +++ b/src/main/java/org/apache/commons/csv/CSVLexer.java @@ -32,10 +32,12 @@ class CSVLexer extends Lexer { * Returns the next token. *

* A token corresponds to a term, a record change or an end-of-file indicator. - * - * @param tkn an existing Token object to reuse. The caller is responsible to initialize the Token. + * + * @param tkn + * an existing Token object to reuse. The caller is responsible to initialize the Token. * @return the next token found - * @throws java.io.IOException on stream access error + * @throws java.io.IOException + * on stream access error */ @Override Token nextToken(Token tkn) throws IOException { @@ -43,17 +45,15 @@ class CSVLexer extends Lexer { // get the last read char (required for empty line detection) int lastChar = in.readAgain(); - // read the next char and set eol + // read the next char and set eol int c = in.read(); /* - * Note: - * The following call will swallow LF if c == CR. - * But we don't need to know if the last char - * was CR or LF - they are equivalent here. + * Note: The following call will swallow LF if c == CR. But we don't need to know if the last char was CR or LF + * - they are equivalent here. */ boolean eol = isEndOfLine(c); - // empty line detection: eol AND (last char was EOL or beginning) + // empty line detection: eol AND (last char was EOL or beginning) if (emptyLinesIgnored) { while (eol && isStartOfLine(lastChar)) { // go on char ahead ... @@ -83,7 +83,7 @@ class CSVLexer extends Lexer { return tkn; } - // important: make sure a new char gets consumed in each iteration + // important: make sure a new char gets consumed in each iteration while (tkn.type == INVALID) { // ignore whitespaces at beginning of a token if (surroundingSpacesIgnored) { @@ -99,14 +99,14 @@ class CSVLexer extends Lexer { tkn.type = TOKEN; } else if (eol) { // empty token return EORECORD("") - //noop: tkn.content.append(""); + // noop: tkn.content.append(""); tkn.type = EORECORD; } else if (isEncapsulator(c)) { // consume encapsulated token encapsulatedTokenLexer(tkn); } else if (isEndOfFile(c)) { // end of file return EOF() - //noop: tkn.content.append(""); + // noop: tkn.content.append(""); tkn.type = EOF; tkn.isReady = true; // there is data at EOF } else { @@ -121,19 +121,21 @@ class CSVLexer extends Lexer { /** * A simple token lexer *

- * Simple token are tokens which are not surrounded by encapsulators. - * A simple token might contain escaped delimiters (as \, or \;). The - * token is finished when one of the following conditions become true: + * Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped + * delimiters (as \, or \;). The token is finished when one of the following conditions become true: *

- * - * @param tkn the current token - * @param c the current character + * + * @param tkn + * the current token + * @param c + * the current character * @return the filled token - * @throws IOException on stream access error + * @throws IOException + * on stream access error */ private Token simpleTokenLexer(Token tkn, int c) throws IOException { // Faster to use while(true)+break than while(tkn.type == INVALID) @@ -167,24 +169,22 @@ class CSVLexer extends Lexer { /** * An encapsulated token lexer *

- * Encapsulated tokens are surrounded by the given encapsulating-string. - * The encapsulator itself might be included in the token using a - * doubling syntax (as "", '') or using escaping (as in \", \'). - * Whitespaces before and after an encapsulated token are ignored. - * The token is finished when one of the following conditions become true: + * Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included + * in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after + * an encapsulated token are ignored. The token is finished when one of the following conditions become true: *

+ * + * @param tkn + * the current token * @return a valid token object - * @throws IOException on invalid state: - * EOF before closing encapsulator or invalid character before delimiter or EOL + * @throws IOException + * on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL */ private Token encapsulatedTokenLexer(Token tkn) throws IOException { // save current line number in case needed for IOE @@ -216,13 +216,15 @@ class CSVLexer extends Lexer { return tkn; } else if (!isWhitespace(c)) { // error invalid char between token and next delimiter - throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter"); + throw new IOException("(line " + getLineNumber() + + ") invalid char between encapsulated token and delimiter"); } } } } else if (isEndOfFile(c)) { // error condition (end of file before end of token) - throw new IOException("(startline " + startLineNumber + ") EOF reached before encapsulated token finished"); + throw new IOException("(startline " + startLineNumber + + ") EOF reached before encapsulated token finished"); } else { // consume character tkn.content.append((char) c);