Fix Checkstyle: Format for 120 line length.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1383582 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Gary D. Gregory 2012-09-11 19:47:15 +00:00
parent a5d3d4ed7c
commit 93fc1f9363
2 changed files with 179 additions and 146 deletions

View File

@ -24,7 +24,7 @@ import java.io.StringWriter;
/** /**
* The format specification of a CSV file. * The format specification of a CSV file.
* *
* This class is immutable. * This class is immutable.
*/ */
public class CSVFormat implements Serializable { public class CSVFormat implements Serializable {
@ -48,10 +48,9 @@ public class CSVFormat implements Serializable {
private final boolean isEncapsulating; private final boolean isEncapsulating;
/** /**
* Constant char to be used for disabling comments, escapes and encapsulation. * Constant char to be used for disabling comments, escapes and encapsulation. The value -2 is used because it
* The value -2 is used because it won't be confused with an EOF signal (-1), * won't be confused with an EOF signal (-1), and because the unicode value FFFE would be encoded as two chars
* and because the unicode value FFFE would be encoded as two chars (using surrogates) * (using surrogates) and thus there should never be a collision with a real text char.
* and thus there should never be a collision with a real text char.
*/ */
static final char DISABLED = '\ufffe'; static final char DISABLED = '\ufffe';
@ -92,15 +91,14 @@ public class CSVFormat implements Serializable {
.withLineSeparator(CRLF); .withLineSeparator(CRLF);
/** /**
* Excel file format (using a comma as the value delimiter). * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
* Note that the actual value delimiter used by Excel is locale dependent, * locale dependent, it might be necessary to customize this format to accomodate to your regional settings.
* it might be necessary to customize this format to accomodate to your
* regional settings.
* <p/> * <p/>
* For example for parsing or generating a CSV file on a French system * For example for parsing or generating a CSV file on a French system the following format will be used:
* the following format will be used: *
* * <pre>
* <pre>CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');</pre> * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
* </pre>
*/ */
public static final CSVFormat EXCEL = public static final CSVFormat EXCEL =
PRISTINE PRISTINE
@ -118,12 +116,12 @@ public class CSVFormat implements Serializable {
.withLineSeparator(CRLF); .withLineSeparator(CRLF);
/** /**
* Default MySQL format used by the <tt>SELECT INTO OUTFILE</tt> and * Default MySQL format used by the <tt>SELECT INTO OUTFILE</tt> and <tt>LOAD DATA INFILE</tt> operations. This is
* <tt>LOAD DATA INFILE</tt> operations. This is a tab-delimited * a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters
* format with a LF character as the line separator. Values are not quoted * are escaped with '\'.
* and special characters are escaped with '\'. *
* * @see <a
* @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a> * href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
*/ */
public static final CSVFormat MYSQL = public static final CSVFormat MYSQL =
PRISTINE PRISTINE
@ -134,25 +132,26 @@ public class CSVFormat implements Serializable {
/** /**
* Creates a customized CSV format. * Creates a customized CSV format.
* *
* @param delimiter the char used for value separation * @param delimiter
* @param encapsulator the char used as value encapsulation marker * the char used for value separation
* @param commentStart the char used for comment identification * @param encapsulator
* @param escape the char used to escape special characters in values * the char used as value encapsulation marker
* @param surroundingSpacesIgnored <tt>true</tt> when whitespaces enclosing values should be ignored * @param commentStart
* @param emptyLinesIgnored <tt>true</tt> when the parser should skip emtpy lines * the char used for comment identification
* @param lineSeparator the line separator to use for output * @param escape
* @param header the header * the char used to escape special characters in values
* @param surroundingSpacesIgnored
* <tt>true</tt> when whitespaces enclosing values should be ignored
* @param emptyLinesIgnored
* <tt>true</tt> when the parser should skip emtpy lines
* @param lineSeparator
* the line separator to use for output
* @param header
* the header
*/ */
CSVFormat( CSVFormat(char delimiter, char encapsulator, char commentStart, char escape, boolean surroundingSpacesIgnored,
char delimiter, boolean emptyLinesIgnored, String lineSeparator, String[] header) {
char encapsulator,
char commentStart,
char escape,
boolean surroundingSpacesIgnored,
boolean emptyLinesIgnored,
String lineSeparator,
String[] header) {
this.delimiter = delimiter; this.delimiter = delimiter;
this.encapsulator = encapsulator; this.encapsulator = encapsulator;
this.commentStart = commentStart; this.commentStart = commentStart;
@ -168,9 +167,10 @@ public class CSVFormat implements Serializable {
/** /**
* Returns true if the given character is a line break character. * Returns true if the given character is a line break character.
* *
* @param c the character to check * @param c
* * the character to check
*
* @return true if <code>c</code> is a line break character * @return true if <code>c</code> is a line break character
*/ */
private static boolean isLineBreak(char c) { private static boolean isLineBreak(char c) {
@ -182,29 +182,34 @@ public class CSVFormat implements Serializable {
*/ */
void validate() throws IllegalArgumentException { void validate() throws IllegalArgumentException {
if (delimiter == encapsulator) { if (delimiter == encapsulator) {
throw new IllegalArgumentException("The encapsulator character and the delimiter cannot be the same (\"" + encapsulator + "\")"); throw new IllegalArgumentException("The encapsulator character and the delimiter cannot be the same (\""
+ encapsulator + "\")");
} }
if (delimiter == escape) { if (delimiter == escape) {
throw new IllegalArgumentException("The escape character and the delimiter cannot be the same (\"" + escape + "\")"); throw new IllegalArgumentException("The escape character and the delimiter cannot be the same (\""
+ escape + "\")");
} }
if (delimiter == commentStart) { if (delimiter == commentStart) {
throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same (\"" + commentStart + "\")"); throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same (\""
+ commentStart + "\")");
} }
if (encapsulator != DISABLED && encapsulator == commentStart) { if (encapsulator != DISABLED && encapsulator == commentStart) {
throw new IllegalArgumentException("The comment start character and the encapsulator cannot be the same (\"" + commentStart + "\")"); throw new IllegalArgumentException(
"The comment start character and the encapsulator cannot be the same (\"" + commentStart + "\")");
} }
if (escape != DISABLED && escape == commentStart) { if (escape != DISABLED && escape == commentStart) {
throw new IllegalArgumentException("The comment start and the escape character cannot be the same (\"" + commentStart + "\")"); throw new IllegalArgumentException("The comment start and the escape character cannot be the same (\""
+ commentStart + "\")");
} }
} }
/** /**
* Returns the character delimiting the values (typically ';', ',' or '\t'). * Returns the character delimiting the values (typically ';', ',' or '\t').
* *
* @return the delimiter character * @return the delimiter character
*/ */
public char getDelimiter() { public char getDelimiter() {
@ -213,22 +218,25 @@ public class CSVFormat implements Serializable {
/** /**
* Returns a copy of this format using the specified delimiter character. * Returns a copy of this format using the specified delimiter character.
* *
* @param delimiter the delimiter character * @param delimiter
* the delimiter character
* @return A copy of this format using the specified delimiter character * @return A copy of this format using the specified delimiter character
* @throws IllegalArgumentException thrown if the specified character is a line break * @throws IllegalArgumentException
* thrown if the specified character is a line break
*/ */
public CSVFormat withDelimiter(char delimiter) { public CSVFormat withDelimiter(char delimiter) {
if (isLineBreak(delimiter)) { if (isLineBreak(delimiter)) {
throw new IllegalArgumentException("The delimiter cannot be a line break"); throw new IllegalArgumentException("The delimiter cannot be a line break");
} }
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
emptyLinesIgnored, lineSeparator, header);
} }
/** /**
* Returns the character used to encapsulate values containing special characters. * Returns the character used to encapsulate values containing special characters.
* *
* @return the encapsulator character * @return the encapsulator character
*/ */
public char getEncapsulator() { public char getEncapsulator() {
@ -237,22 +245,25 @@ public class CSVFormat implements Serializable {
/** /**
* Returns a copy of this format using the specified encapsulator character. * Returns a copy of this format using the specified encapsulator character.
* *
* @param encapsulator the encapsulator character * @param encapsulator
* the encapsulator character
* @return A copy of this format using the specified encapsulator character * @return A copy of this format using the specified encapsulator character
* @throws IllegalArgumentException thrown if the specified character is a line break * @throws IllegalArgumentException
* thrown if the specified character is a line break
*/ */
public CSVFormat withEncapsulator(char encapsulator) { public CSVFormat withEncapsulator(char encapsulator) {
if (isLineBreak(encapsulator)) { if (isLineBreak(encapsulator)) {
throw new IllegalArgumentException("The encapsulator cannot be a line break"); throw new IllegalArgumentException("The encapsulator cannot be a line break");
} }
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
emptyLinesIgnored, lineSeparator, header);
} }
/** /**
* Returns whether an encapsulator has been defined. * Returns whether an encapsulator has been defined.
* *
* @return {@code true} if an encapsulator is defined * @return {@code true} if an encapsulator is defined
*/ */
public boolean isEncapsulating() { public boolean isEncapsulating() {
@ -261,7 +272,7 @@ public class CSVFormat implements Serializable {
/** /**
* Returns the character marking the start of a line comment. * Returns the character marking the start of a line comment.
* *
* @return the comment start marker. * @return the comment start marker.
*/ */
public char getCommentStart() { public char getCommentStart() {
@ -270,28 +281,29 @@ public class CSVFormat implements Serializable {
/** /**
* Returns a copy of this format using the specified character as the comment start marker. * Returns a copy of this format using the specified character as the comment start marker.
* *
* Note that the comment introducer character is only recognised * Note that the comment introducer character is only recognised at the start of a line.
* at the start of a line. *
* * @param commentStart
* @param commentStart the comment start marker * the comment start marker
* @return A copy of this format using the specified character as the comment start marker * @return A copy of this format using the specified character as the comment start marker
* @throws IllegalArgumentException thrown if the specified character is a line break * @throws IllegalArgumentException
* thrown if the specified character is a line break
*/ */
public CSVFormat withCommentStart(char commentStart) { public CSVFormat withCommentStart(char commentStart) {
if (isLineBreak(commentStart)) { if (isLineBreak(commentStart)) {
throw new IllegalArgumentException("The comment start character cannot be a line break"); throw new IllegalArgumentException("The comment start character cannot be a line break");
} }
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
emptyLinesIgnored, lineSeparator, header);
} }
/** /**
* Specifies whether comments are supported by this format. * Specifies whether comments are supported by this format.
* *
* Note that the comment introducer character is only recognised * Note that the comment introducer character is only recognised at the start of a line.
* at the start of a line. *
*
* @return <tt>true</tt> is comments are supported, <tt>false</tt> otherwise * @return <tt>true</tt> is comments are supported, <tt>false</tt> otherwise
*/ */
public boolean isCommentingEnabled() { public boolean isCommentingEnabled() {
@ -300,7 +312,7 @@ public class CSVFormat implements Serializable {
/** /**
* Returns the escape character. * Returns the escape character.
* *
* @return the escape character * @return the escape character
*/ */
public char getEscape() { public char getEscape() {
@ -309,22 +321,25 @@ public class CSVFormat implements Serializable {
/** /**
* Returns a copy of this format using the specified escape character. * Returns a copy of this format using the specified escape character.
* *
* @param escape the escape character * @param escape
* the escape character
* @return A copy of this format using the specified escape character * @return A copy of this format using the specified escape character
* @throws IllegalArgumentException thrown if the specified character is a line break * @throws IllegalArgumentException
* thrown if the specified character is a line break
*/ */
public CSVFormat withEscape(char escape) { public CSVFormat withEscape(char escape) {
if (isLineBreak(escape)) { if (isLineBreak(escape)) {
throw new IllegalArgumentException("The escape character cannot be a line break"); throw new IllegalArgumentException("The escape character cannot be a line break");
} }
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
emptyLinesIgnored, lineSeparator, header);
} }
/** /**
* Returns whether escape are being processed. * Returns whether escape are being processed.
* *
* @return {@code true} if escapes are processed * @return {@code true} if escapes are processed
*/ */
public boolean isEscaping() { public boolean isEscaping() {
@ -333,8 +348,9 @@ public class CSVFormat implements Serializable {
/** /**
* Specifies whether spaces around values are ignored when parsing input. * Specifies whether spaces around values are ignored when parsing input.
* *
* @return <tt>true</tt> if spaces around values are ignored, <tt>false</tt> if they are treated as part of the value. * @return <tt>true</tt> if spaces around values are ignored, <tt>false</tt> if they are treated as part of the
* value.
*/ */
public boolean isSurroundingSpacesIgnored() { public boolean isSurroundingSpacesIgnored() {
return surroundingSpacesIgnored; return surroundingSpacesIgnored;
@ -342,19 +358,22 @@ public class CSVFormat implements Serializable {
/** /**
* Returns a copy of this format with the specified trimming behavior. * Returns a copy of this format with the specified trimming behavior.
* *
* @param surroundingSpacesIgnored the trimming behavior, <tt>true</tt> to remove the surrounding spaces, * @param surroundingSpacesIgnored
* <tt>false</tt> to leave the spaces as is. * the trimming behavior, <tt>true</tt> to remove the surrounding spaces, <tt>false</tt> to leave the
* spaces as is.
* @return A copy of this format with the specified trimming behavior. * @return A copy of this format with the specified trimming behavior.
*/ */
public CSVFormat withSurroundingSpacesIgnored(boolean surroundingSpacesIgnored) { public CSVFormat withSurroundingSpacesIgnored(boolean surroundingSpacesIgnored) {
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
emptyLinesIgnored, lineSeparator, header);
} }
/** /**
* Specifies whether empty lines between records are ignored when parsing input. * Specifies whether empty lines between records are ignored when parsing input.
* *
* @return <tt>true</tt> if empty lines between records are ignored, <tt>false</tt> if they are turned into empty records. * @return <tt>true</tt> if empty lines between records are ignored, <tt>false</tt> if they are turned into empty
* records.
*/ */
public boolean isEmptyLinesIgnored() { public boolean isEmptyLinesIgnored() {
return emptyLinesIgnored; return emptyLinesIgnored;
@ -362,18 +381,20 @@ public class CSVFormat implements Serializable {
/** /**
* Returns a copy of this format with the specified empty line skipping behavior. * Returns a copy of this format with the specified empty line skipping behavior.
* *
* @param emptyLinesIgnored the empty line skipping behavior, <tt>true</tt> to ignore the empty lines * @param emptyLinesIgnored
* between the records, <tt>false</tt> to translate empty lines to empty records. * the empty line skipping behavior, <tt>true</tt> to ignore the empty lines between the records,
* @return A copy of this format with the specified empty line skipping behavior. * <tt>false</tt> to translate empty lines to empty records.
* @return A copy of this format with the specified empty line skipping behavior.
*/ */
public CSVFormat withEmptyLinesIgnored(boolean emptyLinesIgnored) { public CSVFormat withEmptyLinesIgnored(boolean emptyLinesIgnored) {
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
emptyLinesIgnored, lineSeparator, header);
} }
/** /**
* Returns the line separator delimiting output records. * Returns the line separator delimiting output records.
* *
* @return the line separator * @return the line separator
*/ */
public String getLineSeparator() { public String getLineSeparator() {
@ -382,13 +403,15 @@ public class CSVFormat implements Serializable {
/** /**
* Returns a copy of this format using the specified output line separator. * Returns a copy of this format using the specified output line separator.
* *
* @param lineSeparator the line separator to be used for output. * @param lineSeparator
* * the line separator to be used for output.
*
* @return A copy of this format using the specified output line separator * @return A copy of this format using the specified output line separator
*/ */
public CSVFormat withLineSeparator(String lineSeparator) { public CSVFormat withLineSeparator(String lineSeparator) {
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
emptyLinesIgnored, lineSeparator, header);
} }
String[] getHeader() { String[] getHeader() {
@ -396,27 +419,34 @@ public class CSVFormat implements Serializable {
} }
/** /**
* Returns a copy of this format using the specified header. The header can * Returns a copy of this format using the specified header. The header can either be parsed automatically from the
* either be parsed automatically from the input file with: * input file with:
* *
* <pre>CSVFormat format = aformat.withHeader();</pre> * <pre>
* * CSVFormat format = aformat.withHeader();
* </pre>
*
* or specified manually with: * or specified manually with:
* *
* <pre>CSVFormat format = aformat.withHeader("name", "email", "phone");</pre> * <pre>
* * CSVFormat format = aformat.withHeader(&quot;name&quot;, &quot;email&quot;, &quot;phone&quot;);
* @param header the header, <tt>null</tt> if disabled, empty if parsed automatically, user specified otherwise. * </pre>
* *
* @param header
* the header, <tt>null</tt> if disabled, empty if parsed automatically, user specified otherwise.
*
* @return A copy of this format using the specified header * @return A copy of this format using the specified header
*/ */
public CSVFormat withHeader(String... header) { public CSVFormat withHeader(String... header) {
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header); return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
emptyLinesIgnored, lineSeparator, header);
} }
/** /**
* Parses the specified content. * Parses the specified content.
* *
* @param in the input stream * @param in
* the input stream
*/ */
public Iterable<CSVRecord> parse(Reader in) throws IOException { public Iterable<CSVRecord> parse(Reader in) throws IOException {
return new CSVParser(in, this); return new CSVParser(in, this);
@ -424,8 +454,9 @@ public class CSVFormat implements Serializable {
/** /**
* Format the specified values. * Format the specified values.
* *
* @param values the values to format * @param values
* the values to format
*/ */
public String format(String... values) { public String format(String... values) {
StringWriter out = new StringWriter(); StringWriter out = new StringWriter();

View File

@ -32,10 +32,12 @@ class CSVLexer extends Lexer {
* Returns the next token. * Returns the next token.
* <p/> * <p/>
* A token corresponds to a term, a record change or an end-of-file indicator. * A token corresponds to a term, a record change or an end-of-file indicator.
* *
* @param tkn an existing Token object to reuse. The caller is responsible to initialize the Token. * @param tkn
* an existing Token object to reuse. The caller is responsible to initialize the Token.
* @return the next token found * @return the next token found
* @throws java.io.IOException on stream access error * @throws java.io.IOException
* on stream access error
*/ */
@Override @Override
Token nextToken(Token tkn) throws IOException { Token nextToken(Token tkn) throws IOException {
@ -43,17 +45,15 @@ class CSVLexer extends Lexer {
// get the last read char (required for empty line detection) // get the last read char (required for empty line detection)
int lastChar = in.readAgain(); int lastChar = in.readAgain();
// read the next char and set eol // read the next char and set eol
int c = in.read(); int c = in.read();
/* /*
* Note: * Note: The following call will swallow LF if c == CR. But we don't need to know if the last char was CR or LF
* The following call will swallow LF if c == CR. * - they are equivalent here.
* But we don't need to know if the last char
* was CR or LF - they are equivalent here.
*/ */
boolean eol = isEndOfLine(c); boolean eol = isEndOfLine(c);
// empty line detection: eol AND (last char was EOL or beginning) // empty line detection: eol AND (last char was EOL or beginning)
if (emptyLinesIgnored) { if (emptyLinesIgnored) {
while (eol && isStartOfLine(lastChar)) { while (eol && isStartOfLine(lastChar)) {
// go on char ahead ... // go on char ahead ...
@ -83,7 +83,7 @@ class CSVLexer extends Lexer {
return tkn; return tkn;
} }
// important: make sure a new char gets consumed in each iteration // important: make sure a new char gets consumed in each iteration
while (tkn.type == INVALID) { while (tkn.type == INVALID) {
// ignore whitespaces at beginning of a token // ignore whitespaces at beginning of a token
if (surroundingSpacesIgnored) { if (surroundingSpacesIgnored) {
@ -99,14 +99,14 @@ class CSVLexer extends Lexer {
tkn.type = TOKEN; tkn.type = TOKEN;
} else if (eol) { } else if (eol) {
// empty token return EORECORD("") // empty token return EORECORD("")
//noop: tkn.content.append(""); // noop: tkn.content.append("");
tkn.type = EORECORD; tkn.type = EORECORD;
} else if (isEncapsulator(c)) { } else if (isEncapsulator(c)) {
// consume encapsulated token // consume encapsulated token
encapsulatedTokenLexer(tkn); encapsulatedTokenLexer(tkn);
} else if (isEndOfFile(c)) { } else if (isEndOfFile(c)) {
// end of file return EOF() // end of file return EOF()
//noop: tkn.content.append(""); // noop: tkn.content.append("");
tkn.type = EOF; tkn.type = EOF;
tkn.isReady = true; // there is data at EOF tkn.isReady = true; // there is data at EOF
} else { } else {
@ -121,19 +121,21 @@ class CSVLexer extends Lexer {
/** /**
* A simple token lexer * A simple token lexer
* <p/> * <p/>
* Simple token are tokens which are not surrounded by encapsulators. * Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
* A simple token might contain escaped delimiters (as \, or \;). The * delimiters (as \, or \;). The token is finished when one of the following conditions become true:
* token is finished when one of the following conditions become true:
* <ul> * <ul>
* <li>end of line has been reached (EORECORD)</li> * <li>end of line has been reached (EORECORD)</li>
* <li>end of stream has been reached (EOF)</li> * <li>end of stream has been reached (EOF)</li>
* <li>an unescaped delimiter has been reached (TOKEN)</li> * <li>an unescaped delimiter has been reached (TOKEN)</li>
* </ul> * </ul>
* *
* @param tkn the current token * @param tkn
* @param c the current character * the current token
* @param c
* the current character
* @return the filled token * @return the filled token
* @throws IOException on stream access error * @throws IOException
* on stream access error
*/ */
private Token simpleTokenLexer(Token tkn, int c) throws IOException { private Token simpleTokenLexer(Token tkn, int c) throws IOException {
// Faster to use while(true)+break than while(tkn.type == INVALID) // Faster to use while(true)+break than while(tkn.type == INVALID)
@ -167,24 +169,22 @@ class CSVLexer extends Lexer {
/** /**
* An encapsulated token lexer * An encapsulated token lexer
* <p/> * <p/>
* Encapsulated tokens are surrounded by the given encapsulating-string. * Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included
* The encapsulator itself might be included in the token using a * in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after
* doubling syntax (as "", '') or using escaping (as in \", \'). * an encapsulated token are ignored. The token is finished when one of the following conditions become true:
* Whitespaces before and after an encapsulated token are ignored.
* The token is finished when one of the following conditions become true:
* <ul> * <ul>
* <li>an unescaped encapsulator has been reached, and is followed by optional whitespace then:</li> * <li>an unescaped encapsulator has been reached, and is followed by optional whitespace then:</li>
* <ul> * <ul>
* <li>delimiter (TOKEN)</li> * <li>delimiter (TOKEN)</li>
* <li>end of line (EORECORD)</li> * <li>end of line (EORECORD)</li>
* </ul>
* <li>end of stream has been reached (EOF)</li>
* </ul> * </ul>
* * <li>end of stream has been reached (EOF)</li> </ul>
* @param tkn the current token *
* @param tkn
* the current token
* @return a valid token object * @return a valid token object
* @throws IOException on invalid state: * @throws IOException
* EOF before closing encapsulator or invalid character before delimiter or EOL * on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL
*/ */
private Token encapsulatedTokenLexer(Token tkn) throws IOException { private Token encapsulatedTokenLexer(Token tkn) throws IOException {
// save current line number in case needed for IOE // save current line number in case needed for IOE
@ -216,13 +216,15 @@ class CSVLexer extends Lexer {
return tkn; return tkn;
} else if (!isWhitespace(c)) { } else if (!isWhitespace(c)) {
// error invalid char between token and next delimiter // error invalid char between token and next delimiter
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter"); throw new IOException("(line " + getLineNumber()
+ ") invalid char between encapsulated token and delimiter");
} }
} }
} }
} else if (isEndOfFile(c)) { } else if (isEndOfFile(c)) {
// error condition (end of file before end of token) // error condition (end of file before end of token)
throw new IOException("(startline " + startLineNumber + ") EOF reached before encapsulated token finished"); throw new IOException("(startline " + startLineNumber
+ ") EOF reached before encapsulated token finished");
} else { } else {
// consume character // consume character
tkn.content.append((char) c); tkn.content.append((char) c);