Fix Checkstyle: Format for 120 line length.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1383582 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a5d3d4ed7c
commit
93fc1f9363
|
@ -24,7 +24,7 @@ import java.io.StringWriter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The format specification of a CSV file.
|
* The format specification of a CSV file.
|
||||||
*
|
*
|
||||||
* This class is immutable.
|
* This class is immutable.
|
||||||
*/
|
*/
|
||||||
public class CSVFormat implements Serializable {
|
public class CSVFormat implements Serializable {
|
||||||
|
@ -48,10 +48,9 @@ public class CSVFormat implements Serializable {
|
||||||
private final boolean isEncapsulating;
|
private final boolean isEncapsulating;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constant char to be used for disabling comments, escapes and encapsulation.
|
* Constant char to be used for disabling comments, escapes and encapsulation. The value -2 is used because it
|
||||||
* The value -2 is used because it won't be confused with an EOF signal (-1),
|
* won't be confused with an EOF signal (-1), and because the unicode value FFFE would be encoded as two chars
|
||||||
* and because the unicode value FFFE would be encoded as two chars (using surrogates)
|
* (using surrogates) and thus there should never be a collision with a real text char.
|
||||||
* and thus there should never be a collision with a real text char.
|
|
||||||
*/
|
*/
|
||||||
static final char DISABLED = '\ufffe';
|
static final char DISABLED = '\ufffe';
|
||||||
|
|
||||||
|
@ -92,15 +91,14 @@ public class CSVFormat implements Serializable {
|
||||||
.withLineSeparator(CRLF);
|
.withLineSeparator(CRLF);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Excel file format (using a comma as the value delimiter).
|
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
|
||||||
* Note that the actual value delimiter used by Excel is locale dependent,
|
* locale dependent, it might be necessary to customize this format to accomodate to your regional settings.
|
||||||
* it might be necessary to customize this format to accomodate to your
|
|
||||||
* regional settings.
|
|
||||||
* <p/>
|
* <p/>
|
||||||
* For example for parsing or generating a CSV file on a French system
|
* For example for parsing or generating a CSV file on a French system the following format will be used:
|
||||||
* the following format will be used:
|
*
|
||||||
*
|
* <pre>
|
||||||
* <pre>CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');</pre>
|
* CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
|
||||||
|
* </pre>
|
||||||
*/
|
*/
|
||||||
public static final CSVFormat EXCEL =
|
public static final CSVFormat EXCEL =
|
||||||
PRISTINE
|
PRISTINE
|
||||||
|
@ -118,12 +116,12 @@ public class CSVFormat implements Serializable {
|
||||||
.withLineSeparator(CRLF);
|
.withLineSeparator(CRLF);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default MySQL format used by the <tt>SELECT INTO OUTFILE</tt> and
|
* Default MySQL format used by the <tt>SELECT INTO OUTFILE</tt> and <tt>LOAD DATA INFILE</tt> operations. This is
|
||||||
* <tt>LOAD DATA INFILE</tt> operations. This is a tab-delimited
|
* a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters
|
||||||
* format with a LF character as the line separator. Values are not quoted
|
* are escaped with '\'.
|
||||||
* and special characters are escaped with '\'.
|
*
|
||||||
*
|
* @see <a
|
||||||
* @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
|
* href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
|
||||||
*/
|
*/
|
||||||
public static final CSVFormat MYSQL =
|
public static final CSVFormat MYSQL =
|
||||||
PRISTINE
|
PRISTINE
|
||||||
|
@ -134,25 +132,26 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a customized CSV format.
|
* Creates a customized CSV format.
|
||||||
*
|
*
|
||||||
* @param delimiter the char used for value separation
|
* @param delimiter
|
||||||
* @param encapsulator the char used as value encapsulation marker
|
* the char used for value separation
|
||||||
* @param commentStart the char used for comment identification
|
* @param encapsulator
|
||||||
* @param escape the char used to escape special characters in values
|
* the char used as value encapsulation marker
|
||||||
* @param surroundingSpacesIgnored <tt>true</tt> when whitespaces enclosing values should be ignored
|
* @param commentStart
|
||||||
* @param emptyLinesIgnored <tt>true</tt> when the parser should skip emtpy lines
|
* the char used for comment identification
|
||||||
* @param lineSeparator the line separator to use for output
|
* @param escape
|
||||||
* @param header the header
|
* the char used to escape special characters in values
|
||||||
|
* @param surroundingSpacesIgnored
|
||||||
|
* <tt>true</tt> when whitespaces enclosing values should be ignored
|
||||||
|
* @param emptyLinesIgnored
|
||||||
|
* <tt>true</tt> when the parser should skip emtpy lines
|
||||||
|
* @param lineSeparator
|
||||||
|
* the line separator to use for output
|
||||||
|
* @param header
|
||||||
|
* the header
|
||||||
*/
|
*/
|
||||||
CSVFormat(
|
CSVFormat(char delimiter, char encapsulator, char commentStart, char escape, boolean surroundingSpacesIgnored,
|
||||||
char delimiter,
|
boolean emptyLinesIgnored, String lineSeparator, String[] header) {
|
||||||
char encapsulator,
|
|
||||||
char commentStart,
|
|
||||||
char escape,
|
|
||||||
boolean surroundingSpacesIgnored,
|
|
||||||
boolean emptyLinesIgnored,
|
|
||||||
String lineSeparator,
|
|
||||||
String[] header) {
|
|
||||||
this.delimiter = delimiter;
|
this.delimiter = delimiter;
|
||||||
this.encapsulator = encapsulator;
|
this.encapsulator = encapsulator;
|
||||||
this.commentStart = commentStart;
|
this.commentStart = commentStart;
|
||||||
|
@ -168,9 +167,10 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the given character is a line break character.
|
* Returns true if the given character is a line break character.
|
||||||
*
|
*
|
||||||
* @param c the character to check
|
* @param c
|
||||||
*
|
* the character to check
|
||||||
|
*
|
||||||
* @return true if <code>c</code> is a line break character
|
* @return true if <code>c</code> is a line break character
|
||||||
*/
|
*/
|
||||||
private static boolean isLineBreak(char c) {
|
private static boolean isLineBreak(char c) {
|
||||||
|
@ -182,29 +182,34 @@ public class CSVFormat implements Serializable {
|
||||||
*/
|
*/
|
||||||
void validate() throws IllegalArgumentException {
|
void validate() throws IllegalArgumentException {
|
||||||
if (delimiter == encapsulator) {
|
if (delimiter == encapsulator) {
|
||||||
throw new IllegalArgumentException("The encapsulator character and the delimiter cannot be the same (\"" + encapsulator + "\")");
|
throw new IllegalArgumentException("The encapsulator character and the delimiter cannot be the same (\""
|
||||||
|
+ encapsulator + "\")");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (delimiter == escape) {
|
if (delimiter == escape) {
|
||||||
throw new IllegalArgumentException("The escape character and the delimiter cannot be the same (\"" + escape + "\")");
|
throw new IllegalArgumentException("The escape character and the delimiter cannot be the same (\""
|
||||||
|
+ escape + "\")");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (delimiter == commentStart) {
|
if (delimiter == commentStart) {
|
||||||
throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same (\"" + commentStart + "\")");
|
throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same (\""
|
||||||
|
+ commentStart + "\")");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (encapsulator != DISABLED && encapsulator == commentStart) {
|
if (encapsulator != DISABLED && encapsulator == commentStart) {
|
||||||
throw new IllegalArgumentException("The comment start character and the encapsulator cannot be the same (\"" + commentStart + "\")");
|
throw new IllegalArgumentException(
|
||||||
|
"The comment start character and the encapsulator cannot be the same (\"" + commentStart + "\")");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (escape != DISABLED && escape == commentStart) {
|
if (escape != DISABLED && escape == commentStart) {
|
||||||
throw new IllegalArgumentException("The comment start and the escape character cannot be the same (\"" + commentStart + "\")");
|
throw new IllegalArgumentException("The comment start and the escape character cannot be the same (\""
|
||||||
|
+ commentStart + "\")");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the character delimiting the values (typically ';', ',' or '\t').
|
* Returns the character delimiting the values (typically ';', ',' or '\t').
|
||||||
*
|
*
|
||||||
* @return the delimiter character
|
* @return the delimiter character
|
||||||
*/
|
*/
|
||||||
public char getDelimiter() {
|
public char getDelimiter() {
|
||||||
|
@ -213,22 +218,25 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format using the specified delimiter character.
|
* Returns a copy of this format using the specified delimiter character.
|
||||||
*
|
*
|
||||||
* @param delimiter the delimiter character
|
* @param delimiter
|
||||||
|
* the delimiter character
|
||||||
* @return A copy of this format using the specified delimiter character
|
* @return A copy of this format using the specified delimiter character
|
||||||
* @throws IllegalArgumentException thrown if the specified character is a line break
|
* @throws IllegalArgumentException
|
||||||
|
* thrown if the specified character is a line break
|
||||||
*/
|
*/
|
||||||
public CSVFormat withDelimiter(char delimiter) {
|
public CSVFormat withDelimiter(char delimiter) {
|
||||||
if (isLineBreak(delimiter)) {
|
if (isLineBreak(delimiter)) {
|
||||||
throw new IllegalArgumentException("The delimiter cannot be a line break");
|
throw new IllegalArgumentException("The delimiter cannot be a line break");
|
||||||
}
|
}
|
||||||
|
|
||||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
|
||||||
|
emptyLinesIgnored, lineSeparator, header);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the character used to encapsulate values containing special characters.
|
* Returns the character used to encapsulate values containing special characters.
|
||||||
*
|
*
|
||||||
* @return the encapsulator character
|
* @return the encapsulator character
|
||||||
*/
|
*/
|
||||||
public char getEncapsulator() {
|
public char getEncapsulator() {
|
||||||
|
@ -237,22 +245,25 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format using the specified encapsulator character.
|
* Returns a copy of this format using the specified encapsulator character.
|
||||||
*
|
*
|
||||||
* @param encapsulator the encapsulator character
|
* @param encapsulator
|
||||||
|
* the encapsulator character
|
||||||
* @return A copy of this format using the specified encapsulator character
|
* @return A copy of this format using the specified encapsulator character
|
||||||
* @throws IllegalArgumentException thrown if the specified character is a line break
|
* @throws IllegalArgumentException
|
||||||
|
* thrown if the specified character is a line break
|
||||||
*/
|
*/
|
||||||
public CSVFormat withEncapsulator(char encapsulator) {
|
public CSVFormat withEncapsulator(char encapsulator) {
|
||||||
if (isLineBreak(encapsulator)) {
|
if (isLineBreak(encapsulator)) {
|
||||||
throw new IllegalArgumentException("The encapsulator cannot be a line break");
|
throw new IllegalArgumentException("The encapsulator cannot be a line break");
|
||||||
}
|
}
|
||||||
|
|
||||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
|
||||||
|
emptyLinesIgnored, lineSeparator, header);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns whether an encapsulator has been defined.
|
* Returns whether an encapsulator has been defined.
|
||||||
*
|
*
|
||||||
* @return {@code true} if an encapsulator is defined
|
* @return {@code true} if an encapsulator is defined
|
||||||
*/
|
*/
|
||||||
public boolean isEncapsulating() {
|
public boolean isEncapsulating() {
|
||||||
|
@ -261,7 +272,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the character marking the start of a line comment.
|
* Returns the character marking the start of a line comment.
|
||||||
*
|
*
|
||||||
* @return the comment start marker.
|
* @return the comment start marker.
|
||||||
*/
|
*/
|
||||||
public char getCommentStart() {
|
public char getCommentStart() {
|
||||||
|
@ -270,28 +281,29 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format using the specified character as the comment start marker.
|
* Returns a copy of this format using the specified character as the comment start marker.
|
||||||
*
|
*
|
||||||
* Note that the comment introducer character is only recognised
|
* Note that the comment introducer character is only recognised at the start of a line.
|
||||||
* at the start of a line.
|
*
|
||||||
*
|
* @param commentStart
|
||||||
* @param commentStart the comment start marker
|
* the comment start marker
|
||||||
* @return A copy of this format using the specified character as the comment start marker
|
* @return A copy of this format using the specified character as the comment start marker
|
||||||
* @throws IllegalArgumentException thrown if the specified character is a line break
|
* @throws IllegalArgumentException
|
||||||
|
* thrown if the specified character is a line break
|
||||||
*/
|
*/
|
||||||
public CSVFormat withCommentStart(char commentStart) {
|
public CSVFormat withCommentStart(char commentStart) {
|
||||||
if (isLineBreak(commentStart)) {
|
if (isLineBreak(commentStart)) {
|
||||||
throw new IllegalArgumentException("The comment start character cannot be a line break");
|
throw new IllegalArgumentException("The comment start character cannot be a line break");
|
||||||
}
|
}
|
||||||
|
|
||||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
|
||||||
|
emptyLinesIgnored, lineSeparator, header);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Specifies whether comments are supported by this format.
|
* Specifies whether comments are supported by this format.
|
||||||
*
|
*
|
||||||
* Note that the comment introducer character is only recognised
|
* Note that the comment introducer character is only recognised at the start of a line.
|
||||||
* at the start of a line.
|
*
|
||||||
*
|
|
||||||
* @return <tt>true</tt> is comments are supported, <tt>false</tt> otherwise
|
* @return <tt>true</tt> is comments are supported, <tt>false</tt> otherwise
|
||||||
*/
|
*/
|
||||||
public boolean isCommentingEnabled() {
|
public boolean isCommentingEnabled() {
|
||||||
|
@ -300,7 +312,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the escape character.
|
* Returns the escape character.
|
||||||
*
|
*
|
||||||
* @return the escape character
|
* @return the escape character
|
||||||
*/
|
*/
|
||||||
public char getEscape() {
|
public char getEscape() {
|
||||||
|
@ -309,22 +321,25 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format using the specified escape character.
|
* Returns a copy of this format using the specified escape character.
|
||||||
*
|
*
|
||||||
* @param escape the escape character
|
* @param escape
|
||||||
|
* the escape character
|
||||||
* @return A copy of this format using the specified escape character
|
* @return A copy of this format using the specified escape character
|
||||||
* @throws IllegalArgumentException thrown if the specified character is a line break
|
* @throws IllegalArgumentException
|
||||||
|
* thrown if the specified character is a line break
|
||||||
*/
|
*/
|
||||||
public CSVFormat withEscape(char escape) {
|
public CSVFormat withEscape(char escape) {
|
||||||
if (isLineBreak(escape)) {
|
if (isLineBreak(escape)) {
|
||||||
throw new IllegalArgumentException("The escape character cannot be a line break");
|
throw new IllegalArgumentException("The escape character cannot be a line break");
|
||||||
}
|
}
|
||||||
|
|
||||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
|
||||||
|
emptyLinesIgnored, lineSeparator, header);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns whether escape are being processed.
|
* Returns whether escape are being processed.
|
||||||
*
|
*
|
||||||
* @return {@code true} if escapes are processed
|
* @return {@code true} if escapes are processed
|
||||||
*/
|
*/
|
||||||
public boolean isEscaping() {
|
public boolean isEscaping() {
|
||||||
|
@ -333,8 +348,9 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Specifies whether spaces around values are ignored when parsing input.
|
* Specifies whether spaces around values are ignored when parsing input.
|
||||||
*
|
*
|
||||||
* @return <tt>true</tt> if spaces around values are ignored, <tt>false</tt> if they are treated as part of the value.
|
* @return <tt>true</tt> if spaces around values are ignored, <tt>false</tt> if they are treated as part of the
|
||||||
|
* value.
|
||||||
*/
|
*/
|
||||||
public boolean isSurroundingSpacesIgnored() {
|
public boolean isSurroundingSpacesIgnored() {
|
||||||
return surroundingSpacesIgnored;
|
return surroundingSpacesIgnored;
|
||||||
|
@ -342,19 +358,22 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format with the specified trimming behavior.
|
* Returns a copy of this format with the specified trimming behavior.
|
||||||
*
|
*
|
||||||
* @param surroundingSpacesIgnored the trimming behavior, <tt>true</tt> to remove the surrounding spaces,
|
* @param surroundingSpacesIgnored
|
||||||
* <tt>false</tt> to leave the spaces as is.
|
* the trimming behavior, <tt>true</tt> to remove the surrounding spaces, <tt>false</tt> to leave the
|
||||||
|
* spaces as is.
|
||||||
* @return A copy of this format with the specified trimming behavior.
|
* @return A copy of this format with the specified trimming behavior.
|
||||||
*/
|
*/
|
||||||
public CSVFormat withSurroundingSpacesIgnored(boolean surroundingSpacesIgnored) {
|
public CSVFormat withSurroundingSpacesIgnored(boolean surroundingSpacesIgnored) {
|
||||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
|
||||||
|
emptyLinesIgnored, lineSeparator, header);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Specifies whether empty lines between records are ignored when parsing input.
|
* Specifies whether empty lines between records are ignored when parsing input.
|
||||||
*
|
*
|
||||||
* @return <tt>true</tt> if empty lines between records are ignored, <tt>false</tt> if they are turned into empty records.
|
* @return <tt>true</tt> if empty lines between records are ignored, <tt>false</tt> if they are turned into empty
|
||||||
|
* records.
|
||||||
*/
|
*/
|
||||||
public boolean isEmptyLinesIgnored() {
|
public boolean isEmptyLinesIgnored() {
|
||||||
return emptyLinesIgnored;
|
return emptyLinesIgnored;
|
||||||
|
@ -362,18 +381,20 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format with the specified empty line skipping behavior.
|
* Returns a copy of this format with the specified empty line skipping behavior.
|
||||||
*
|
*
|
||||||
* @param emptyLinesIgnored the empty line skipping behavior, <tt>true</tt> to ignore the empty lines
|
* @param emptyLinesIgnored
|
||||||
* between the records, <tt>false</tt> to translate empty lines to empty records.
|
* the empty line skipping behavior, <tt>true</tt> to ignore the empty lines between the records,
|
||||||
* @return A copy of this format with the specified empty line skipping behavior.
|
* <tt>false</tt> to translate empty lines to empty records.
|
||||||
|
* @return A copy of this format with the specified empty line skipping behavior.
|
||||||
*/
|
*/
|
||||||
public CSVFormat withEmptyLinesIgnored(boolean emptyLinesIgnored) {
|
public CSVFormat withEmptyLinesIgnored(boolean emptyLinesIgnored) {
|
||||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
|
||||||
|
emptyLinesIgnored, lineSeparator, header);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the line separator delimiting output records.
|
* Returns the line separator delimiting output records.
|
||||||
*
|
*
|
||||||
* @return the line separator
|
* @return the line separator
|
||||||
*/
|
*/
|
||||||
public String getLineSeparator() {
|
public String getLineSeparator() {
|
||||||
|
@ -382,13 +403,15 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format using the specified output line separator.
|
* Returns a copy of this format using the specified output line separator.
|
||||||
*
|
*
|
||||||
* @param lineSeparator the line separator to be used for output.
|
* @param lineSeparator
|
||||||
*
|
* the line separator to be used for output.
|
||||||
|
*
|
||||||
* @return A copy of this format using the specified output line separator
|
* @return A copy of this format using the specified output line separator
|
||||||
*/
|
*/
|
||||||
public CSVFormat withLineSeparator(String lineSeparator) {
|
public CSVFormat withLineSeparator(String lineSeparator) {
|
||||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
|
||||||
|
emptyLinesIgnored, lineSeparator, header);
|
||||||
}
|
}
|
||||||
|
|
||||||
String[] getHeader() {
|
String[] getHeader() {
|
||||||
|
@ -396,27 +419,34 @@ public class CSVFormat implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format using the specified header. The header can
|
* Returns a copy of this format using the specified header. The header can either be parsed automatically from the
|
||||||
* either be parsed automatically from the input file with:
|
* input file with:
|
||||||
*
|
*
|
||||||
* <pre>CSVFormat format = aformat.withHeader();</pre>
|
* <pre>
|
||||||
*
|
* CSVFormat format = aformat.withHeader();
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
* or specified manually with:
|
* or specified manually with:
|
||||||
*
|
*
|
||||||
* <pre>CSVFormat format = aformat.withHeader("name", "email", "phone");</pre>
|
* <pre>
|
||||||
*
|
* CSVFormat format = aformat.withHeader("name", "email", "phone");
|
||||||
* @param header the header, <tt>null</tt> if disabled, empty if parsed automatically, user specified otherwise.
|
* </pre>
|
||||||
*
|
*
|
||||||
|
* @param header
|
||||||
|
* the header, <tt>null</tt> if disabled, empty if parsed automatically, user specified otherwise.
|
||||||
|
*
|
||||||
* @return A copy of this format using the specified header
|
* @return A copy of this format using the specified header
|
||||||
*/
|
*/
|
||||||
public CSVFormat withHeader(String... header) {
|
public CSVFormat withHeader(String... header) {
|
||||||
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored, emptyLinesIgnored, lineSeparator, header);
|
return new CSVFormat(delimiter, encapsulator, commentStart, escape, surroundingSpacesIgnored,
|
||||||
|
emptyLinesIgnored, lineSeparator, header);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses the specified content.
|
* Parses the specified content.
|
||||||
*
|
*
|
||||||
* @param in the input stream
|
* @param in
|
||||||
|
* the input stream
|
||||||
*/
|
*/
|
||||||
public Iterable<CSVRecord> parse(Reader in) throws IOException {
|
public Iterable<CSVRecord> parse(Reader in) throws IOException {
|
||||||
return new CSVParser(in, this);
|
return new CSVParser(in, this);
|
||||||
|
@ -424,8 +454,9 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Format the specified values.
|
* Format the specified values.
|
||||||
*
|
*
|
||||||
* @param values the values to format
|
* @param values
|
||||||
|
* the values to format
|
||||||
*/
|
*/
|
||||||
public String format(String... values) {
|
public String format(String... values) {
|
||||||
StringWriter out = new StringWriter();
|
StringWriter out = new StringWriter();
|
||||||
|
|
|
@ -32,10 +32,12 @@ class CSVLexer extends Lexer {
|
||||||
* Returns the next token.
|
* Returns the next token.
|
||||||
* <p/>
|
* <p/>
|
||||||
* A token corresponds to a term, a record change or an end-of-file indicator.
|
* A token corresponds to a term, a record change or an end-of-file indicator.
|
||||||
*
|
*
|
||||||
* @param tkn an existing Token object to reuse. The caller is responsible to initialize the Token.
|
* @param tkn
|
||||||
|
* an existing Token object to reuse. The caller is responsible to initialize the Token.
|
||||||
* @return the next token found
|
* @return the next token found
|
||||||
* @throws java.io.IOException on stream access error
|
* @throws java.io.IOException
|
||||||
|
* on stream access error
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
Token nextToken(Token tkn) throws IOException {
|
Token nextToken(Token tkn) throws IOException {
|
||||||
|
@ -43,17 +45,15 @@ class CSVLexer extends Lexer {
|
||||||
// get the last read char (required for empty line detection)
|
// get the last read char (required for empty line detection)
|
||||||
int lastChar = in.readAgain();
|
int lastChar = in.readAgain();
|
||||||
|
|
||||||
// read the next char and set eol
|
// read the next char and set eol
|
||||||
int c = in.read();
|
int c = in.read();
|
||||||
/*
|
/*
|
||||||
* Note:
|
* Note: The following call will swallow LF if c == CR. But we don't need to know if the last char was CR or LF
|
||||||
* The following call will swallow LF if c == CR.
|
* - they are equivalent here.
|
||||||
* But we don't need to know if the last char
|
|
||||||
* was CR or LF - they are equivalent here.
|
|
||||||
*/
|
*/
|
||||||
boolean eol = isEndOfLine(c);
|
boolean eol = isEndOfLine(c);
|
||||||
|
|
||||||
// empty line detection: eol AND (last char was EOL or beginning)
|
// empty line detection: eol AND (last char was EOL or beginning)
|
||||||
if (emptyLinesIgnored) {
|
if (emptyLinesIgnored) {
|
||||||
while (eol && isStartOfLine(lastChar)) {
|
while (eol && isStartOfLine(lastChar)) {
|
||||||
// go on char ahead ...
|
// go on char ahead ...
|
||||||
|
@ -83,7 +83,7 @@ class CSVLexer extends Lexer {
|
||||||
return tkn;
|
return tkn;
|
||||||
}
|
}
|
||||||
|
|
||||||
// important: make sure a new char gets consumed in each iteration
|
// important: make sure a new char gets consumed in each iteration
|
||||||
while (tkn.type == INVALID) {
|
while (tkn.type == INVALID) {
|
||||||
// ignore whitespaces at beginning of a token
|
// ignore whitespaces at beginning of a token
|
||||||
if (surroundingSpacesIgnored) {
|
if (surroundingSpacesIgnored) {
|
||||||
|
@ -99,14 +99,14 @@ class CSVLexer extends Lexer {
|
||||||
tkn.type = TOKEN;
|
tkn.type = TOKEN;
|
||||||
} else if (eol) {
|
} else if (eol) {
|
||||||
// empty token return EORECORD("")
|
// empty token return EORECORD("")
|
||||||
//noop: tkn.content.append("");
|
// noop: tkn.content.append("");
|
||||||
tkn.type = EORECORD;
|
tkn.type = EORECORD;
|
||||||
} else if (isEncapsulator(c)) {
|
} else if (isEncapsulator(c)) {
|
||||||
// consume encapsulated token
|
// consume encapsulated token
|
||||||
encapsulatedTokenLexer(tkn);
|
encapsulatedTokenLexer(tkn);
|
||||||
} else if (isEndOfFile(c)) {
|
} else if (isEndOfFile(c)) {
|
||||||
// end of file return EOF()
|
// end of file return EOF()
|
||||||
//noop: tkn.content.append("");
|
// noop: tkn.content.append("");
|
||||||
tkn.type = EOF;
|
tkn.type = EOF;
|
||||||
tkn.isReady = true; // there is data at EOF
|
tkn.isReady = true; // there is data at EOF
|
||||||
} else {
|
} else {
|
||||||
|
@ -121,19 +121,21 @@ class CSVLexer extends Lexer {
|
||||||
/**
|
/**
|
||||||
* A simple token lexer
|
* A simple token lexer
|
||||||
* <p/>
|
* <p/>
|
||||||
* Simple token are tokens which are not surrounded by encapsulators.
|
* Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
|
||||||
* A simple token might contain escaped delimiters (as \, or \;). The
|
* delimiters (as \, or \;). The token is finished when one of the following conditions become true:
|
||||||
* token is finished when one of the following conditions become true:
|
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>end of line has been reached (EORECORD)</li>
|
* <li>end of line has been reached (EORECORD)</li>
|
||||||
* <li>end of stream has been reached (EOF)</li>
|
* <li>end of stream has been reached (EOF)</li>
|
||||||
* <li>an unescaped delimiter has been reached (TOKEN)</li>
|
* <li>an unescaped delimiter has been reached (TOKEN)</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* @param tkn the current token
|
* @param tkn
|
||||||
* @param c the current character
|
* the current token
|
||||||
|
* @param c
|
||||||
|
* the current character
|
||||||
* @return the filled token
|
* @return the filled token
|
||||||
* @throws IOException on stream access error
|
* @throws IOException
|
||||||
|
* on stream access error
|
||||||
*/
|
*/
|
||||||
private Token simpleTokenLexer(Token tkn, int c) throws IOException {
|
private Token simpleTokenLexer(Token tkn, int c) throws IOException {
|
||||||
// Faster to use while(true)+break than while(tkn.type == INVALID)
|
// Faster to use while(true)+break than while(tkn.type == INVALID)
|
||||||
|
@ -167,24 +169,22 @@ class CSVLexer extends Lexer {
|
||||||
/**
|
/**
|
||||||
* An encapsulated token lexer
|
* An encapsulated token lexer
|
||||||
* <p/>
|
* <p/>
|
||||||
* Encapsulated tokens are surrounded by the given encapsulating-string.
|
* Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included
|
||||||
* The encapsulator itself might be included in the token using a
|
* in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after
|
||||||
* doubling syntax (as "", '') or using escaping (as in \", \').
|
* an encapsulated token are ignored. The token is finished when one of the following conditions become true:
|
||||||
* Whitespaces before and after an encapsulated token are ignored.
|
|
||||||
* The token is finished when one of the following conditions become true:
|
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>an unescaped encapsulator has been reached, and is followed by optional whitespace then:</li>
|
* <li>an unescaped encapsulator has been reached, and is followed by optional whitespace then:</li>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>delimiter (TOKEN)</li>
|
* <li>delimiter (TOKEN)</li>
|
||||||
* <li>end of line (EORECORD)</li>
|
* <li>end of line (EORECORD)</li>
|
||||||
* </ul>
|
|
||||||
* <li>end of stream has been reached (EOF)</li>
|
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
* <li>end of stream has been reached (EOF)</li> </ul>
|
||||||
* @param tkn the current token
|
*
|
||||||
|
* @param tkn
|
||||||
|
* the current token
|
||||||
* @return a valid token object
|
* @return a valid token object
|
||||||
* @throws IOException on invalid state:
|
* @throws IOException
|
||||||
* EOF before closing encapsulator or invalid character before delimiter or EOL
|
* on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL
|
||||||
*/
|
*/
|
||||||
private Token encapsulatedTokenLexer(Token tkn) throws IOException {
|
private Token encapsulatedTokenLexer(Token tkn) throws IOException {
|
||||||
// save current line number in case needed for IOE
|
// save current line number in case needed for IOE
|
||||||
|
@ -216,13 +216,15 @@ class CSVLexer extends Lexer {
|
||||||
return tkn;
|
return tkn;
|
||||||
} else if (!isWhitespace(c)) {
|
} else if (!isWhitespace(c)) {
|
||||||
// error invalid char between token and next delimiter
|
// error invalid char between token and next delimiter
|
||||||
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
|
throw new IOException("(line " + getLineNumber()
|
||||||
|
+ ") invalid char between encapsulated token and delimiter");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (isEndOfFile(c)) {
|
} else if (isEndOfFile(c)) {
|
||||||
// error condition (end of file before end of token)
|
// error condition (end of file before end of token)
|
||||||
throw new IOException("(startline " + startLineNumber + ") EOF reached before encapsulated token finished");
|
throw new IOException("(startline " + startLineNumber
|
||||||
|
+ ") EOF reached before encapsulated token finished");
|
||||||
} else {
|
} else {
|
||||||
// consume character
|
// consume character
|
||||||
tkn.content.append((char) c);
|
tkn.content.append((char) c);
|
||||||
|
|
Loading…
Reference in New Issue