Fix Checkstyle: Format for 120 line length.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1383598 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8608520aa1
commit
6c1b0fa1c4
|
@ -24,7 +24,7 @@ import java.io.StringWriter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The format specification of a CSV file.
|
* The format specification of a CSV file.
|
||||||
*
|
*
|
||||||
* This class is immutable.
|
* This class is immutable.
|
||||||
*/
|
*/
|
||||||
public class CSVFormat implements Serializable {
|
public class CSVFormat implements Serializable {
|
||||||
|
@ -95,7 +95,7 @@ public class CSVFormat implements Serializable {
|
||||||
* locale dependent, it might be necessary to customize this format to accomodate to your regional settings.
|
* locale dependent, it might be necessary to customize this format to accomodate to your regional settings.
|
||||||
* <p/>
|
* <p/>
|
||||||
* For example for parsing or generating a CSV file on a French system the following format will be used:
|
* For example for parsing or generating a CSV file on a French system the following format will be used:
|
||||||
*
|
*
|
||||||
* <pre>
|
* <pre>
|
||||||
* CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
|
* CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
|
||||||
* </pre>
|
* </pre>
|
||||||
|
@ -120,8 +120,8 @@ public class CSVFormat implements Serializable {
|
||||||
* a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters
|
* a tab-delimited format with a LF character as the line separator. Values are not quoted and special characters
|
||||||
* are escaped with '\'.
|
* are escaped with '\'.
|
||||||
*
|
*
|
||||||
* @see <a
|
* @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">
|
||||||
* href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
|
* http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
|
||||||
*/
|
*/
|
||||||
public static final CSVFormat MYSQL =
|
public static final CSVFormat MYSQL =
|
||||||
PRISTINE
|
PRISTINE
|
||||||
|
@ -132,7 +132,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a customized CSV format.
|
* Creates a customized CSV format.
|
||||||
*
|
*
|
||||||
* @param delimiter
|
* @param delimiter
|
||||||
* the char used for value separation
|
* the char used for value separation
|
||||||
* @param encapsulator
|
* @param encapsulator
|
||||||
|
@ -167,10 +167,10 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the given character is a line break character.
|
* Returns true if the given character is a line break character.
|
||||||
*
|
*
|
||||||
* @param c
|
* @param c
|
||||||
* the character to check
|
* the character to check
|
||||||
*
|
*
|
||||||
* @return true if <code>c</code> is a line break character
|
* @return true if <code>c</code> is a line break character
|
||||||
*/
|
*/
|
||||||
private static boolean isLineBreak(char c) {
|
private static boolean isLineBreak(char c) {
|
||||||
|
@ -209,7 +209,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the character delimiting the values (typically ';', ',' or '\t').
|
* Returns the character delimiting the values (typically ';', ',' or '\t').
|
||||||
*
|
*
|
||||||
* @return the delimiter character
|
* @return the delimiter character
|
||||||
*/
|
*/
|
||||||
public char getDelimiter() {
|
public char getDelimiter() {
|
||||||
|
@ -218,7 +218,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format using the specified delimiter character.
|
* Returns a copy of this format using the specified delimiter character.
|
||||||
*
|
*
|
||||||
* @param delimiter
|
* @param delimiter
|
||||||
* the delimiter character
|
* the delimiter character
|
||||||
* @return A copy of this format using the specified delimiter character
|
* @return A copy of this format using the specified delimiter character
|
||||||
|
@ -236,7 +236,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the character used to encapsulate values containing special characters.
|
* Returns the character used to encapsulate values containing special characters.
|
||||||
*
|
*
|
||||||
* @return the encapsulator character
|
* @return the encapsulator character
|
||||||
*/
|
*/
|
||||||
public char getEncapsulator() {
|
public char getEncapsulator() {
|
||||||
|
@ -245,7 +245,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format using the specified encapsulator character.
|
* Returns a copy of this format using the specified encapsulator character.
|
||||||
*
|
*
|
||||||
* @param encapsulator
|
* @param encapsulator
|
||||||
* the encapsulator character
|
* the encapsulator character
|
||||||
* @return A copy of this format using the specified encapsulator character
|
* @return A copy of this format using the specified encapsulator character
|
||||||
|
@ -263,7 +263,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns whether an encapsulator has been defined.
|
* Returns whether an encapsulator has been defined.
|
||||||
*
|
*
|
||||||
* @return {@code true} if an encapsulator is defined
|
* @return {@code true} if an encapsulator is defined
|
||||||
*/
|
*/
|
||||||
public boolean isEncapsulating() {
|
public boolean isEncapsulating() {
|
||||||
|
@ -272,7 +272,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the character marking the start of a line comment.
|
* Returns the character marking the start of a line comment.
|
||||||
*
|
*
|
||||||
* @return the comment start marker.
|
* @return the comment start marker.
|
||||||
*/
|
*/
|
||||||
public char getCommentStart() {
|
public char getCommentStart() {
|
||||||
|
@ -281,9 +281,9 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format using the specified character as the comment start marker.
|
* Returns a copy of this format using the specified character as the comment start marker.
|
||||||
*
|
*
|
||||||
* Note that the comment introducer character is only recognised at the start of a line.
|
* Note that the comment introducer character is only recognised at the start of a line.
|
||||||
*
|
*
|
||||||
* @param commentStart
|
* @param commentStart
|
||||||
* the comment start marker
|
* the comment start marker
|
||||||
* @return A copy of this format using the specified character as the comment start marker
|
* @return A copy of this format using the specified character as the comment start marker
|
||||||
|
@ -301,9 +301,9 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Specifies whether comments are supported by this format.
|
* Specifies whether comments are supported by this format.
|
||||||
*
|
*
|
||||||
* Note that the comment introducer character is only recognised at the start of a line.
|
* Note that the comment introducer character is only recognised at the start of a line.
|
||||||
*
|
*
|
||||||
* @return <tt>true</tt> is comments are supported, <tt>false</tt> otherwise
|
* @return <tt>true</tt> is comments are supported, <tt>false</tt> otherwise
|
||||||
*/
|
*/
|
||||||
public boolean isCommentingEnabled() {
|
public boolean isCommentingEnabled() {
|
||||||
|
@ -312,7 +312,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the escape character.
|
* Returns the escape character.
|
||||||
*
|
*
|
||||||
* @return the escape character
|
* @return the escape character
|
||||||
*/
|
*/
|
||||||
public char getEscape() {
|
public char getEscape() {
|
||||||
|
@ -321,7 +321,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format using the specified escape character.
|
* Returns a copy of this format using the specified escape character.
|
||||||
*
|
*
|
||||||
* @param escape
|
* @param escape
|
||||||
* the escape character
|
* the escape character
|
||||||
* @return A copy of this format using the specified escape character
|
* @return A copy of this format using the specified escape character
|
||||||
|
@ -339,7 +339,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns whether escape are being processed.
|
* Returns whether escape are being processed.
|
||||||
*
|
*
|
||||||
* @return {@code true} if escapes are processed
|
* @return {@code true} if escapes are processed
|
||||||
*/
|
*/
|
||||||
public boolean isEscaping() {
|
public boolean isEscaping() {
|
||||||
|
@ -348,7 +348,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Specifies whether spaces around values are ignored when parsing input.
|
* Specifies whether spaces around values are ignored when parsing input.
|
||||||
*
|
*
|
||||||
* @return <tt>true</tt> if spaces around values are ignored, <tt>false</tt> if they are treated as part of the
|
* @return <tt>true</tt> if spaces around values are ignored, <tt>false</tt> if they are treated as part of the
|
||||||
* value.
|
* value.
|
||||||
*/
|
*/
|
||||||
|
@ -358,7 +358,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format with the specified trimming behavior.
|
* Returns a copy of this format with the specified trimming behavior.
|
||||||
*
|
*
|
||||||
* @param surroundingSpacesIgnored
|
* @param surroundingSpacesIgnored
|
||||||
* the trimming behavior, <tt>true</tt> to remove the surrounding spaces, <tt>false</tt> to leave the
|
* the trimming behavior, <tt>true</tt> to remove the surrounding spaces, <tt>false</tt> to leave the
|
||||||
* spaces as is.
|
* spaces as is.
|
||||||
|
@ -371,7 +371,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Specifies whether empty lines between records are ignored when parsing input.
|
* Specifies whether empty lines between records are ignored when parsing input.
|
||||||
*
|
*
|
||||||
* @return <tt>true</tt> if empty lines between records are ignored, <tt>false</tt> if they are turned into empty
|
* @return <tt>true</tt> if empty lines between records are ignored, <tt>false</tt> if they are turned into empty
|
||||||
* records.
|
* records.
|
||||||
*/
|
*/
|
||||||
|
@ -381,7 +381,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format with the specified empty line skipping behavior.
|
* Returns a copy of this format with the specified empty line skipping behavior.
|
||||||
*
|
*
|
||||||
* @param emptyLinesIgnored
|
* @param emptyLinesIgnored
|
||||||
* the empty line skipping behavior, <tt>true</tt> to ignore the empty lines between the records,
|
* the empty line skipping behavior, <tt>true</tt> to ignore the empty lines between the records,
|
||||||
* <tt>false</tt> to translate empty lines to empty records.
|
* <tt>false</tt> to translate empty lines to empty records.
|
||||||
|
@ -394,7 +394,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the line separator delimiting output records.
|
* Returns the line separator delimiting output records.
|
||||||
*
|
*
|
||||||
* @return the line separator
|
* @return the line separator
|
||||||
*/
|
*/
|
||||||
public String getLineSeparator() {
|
public String getLineSeparator() {
|
||||||
|
@ -403,10 +403,10 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format using the specified output line separator.
|
* Returns a copy of this format using the specified output line separator.
|
||||||
*
|
*
|
||||||
* @param lineSeparator
|
* @param lineSeparator
|
||||||
* the line separator to be used for output.
|
* the line separator to be used for output.
|
||||||
*
|
*
|
||||||
* @return A copy of this format using the specified output line separator
|
* @return A copy of this format using the specified output line separator
|
||||||
*/
|
*/
|
||||||
public CSVFormat withLineSeparator(String lineSeparator) {
|
public CSVFormat withLineSeparator(String lineSeparator) {
|
||||||
|
@ -421,20 +421,20 @@ public class CSVFormat implements Serializable {
|
||||||
/**
|
/**
|
||||||
* Returns a copy of this format using the specified header. The header can either be parsed automatically from the
|
* Returns a copy of this format using the specified header. The header can either be parsed automatically from the
|
||||||
* input file with:
|
* input file with:
|
||||||
*
|
*
|
||||||
* <pre>
|
* <pre>
|
||||||
* CSVFormat format = aformat.withHeader();
|
* CSVFormat format = aformat.withHeader();
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* or specified manually with:
|
* or specified manually with:
|
||||||
*
|
*
|
||||||
* <pre>
|
* <pre>
|
||||||
* CSVFormat format = aformat.withHeader("name", "email", "phone");
|
* CSVFormat format = aformat.withHeader("name", "email", "phone");
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* @param header
|
* @param header
|
||||||
* the header, <tt>null</tt> if disabled, empty if parsed automatically, user specified otherwise.
|
* the header, <tt>null</tt> if disabled, empty if parsed automatically, user specified otherwise.
|
||||||
*
|
*
|
||||||
* @return A copy of this format using the specified header
|
* @return A copy of this format using the specified header
|
||||||
*/
|
*/
|
||||||
public CSVFormat withHeader(String... header) {
|
public CSVFormat withHeader(String... header) {
|
||||||
|
@ -444,7 +444,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses the specified content.
|
* Parses the specified content.
|
||||||
*
|
*
|
||||||
* @param in
|
* @param in
|
||||||
* the input stream
|
* the input stream
|
||||||
*/
|
*/
|
||||||
|
@ -454,7 +454,7 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Format the specified values.
|
* Format the specified values.
|
||||||
*
|
*
|
||||||
* @param values
|
* @param values
|
||||||
* the values to format
|
* the values to format
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -32,7 +32,7 @@ class CSVLexer extends Lexer {
|
||||||
* Returns the next token.
|
* Returns the next token.
|
||||||
* <p/>
|
* <p/>
|
||||||
* A token corresponds to a term, a record change or an end-of-file indicator.
|
* A token corresponds to a term, a record change or an end-of-file indicator.
|
||||||
*
|
*
|
||||||
* @param token
|
* @param token
|
||||||
* an existing Token object to reuse. The caller is responsible to initialize the Token.
|
* an existing Token object to reuse. The caller is responsible to initialize the Token.
|
||||||
* @return the next token found
|
* @return the next token found
|
||||||
|
@ -128,7 +128,7 @@ class CSVLexer extends Lexer {
|
||||||
* <li>end of stream has been reached (EOF)</li>
|
* <li>end of stream has been reached (EOF)</li>
|
||||||
* <li>an unescaped delimiter has been reached (TOKEN)</li>
|
* <li>an unescaped delimiter has been reached (TOKEN)</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* @param tkn
|
* @param tkn
|
||||||
* the current token
|
* the current token
|
||||||
* @param c
|
* @param c
|
||||||
|
@ -179,7 +179,7 @@ class CSVLexer extends Lexer {
|
||||||
* <li>end of line (EORECORD)</li>
|
* <li>end of line (EORECORD)</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
* <li>end of stream has been reached (EOF)</li> </ul>
|
* <li>end of stream has been reached (EOF)</li> </ul>
|
||||||
*
|
*
|
||||||
* @param tkn
|
* @param tkn
|
||||||
* the current token
|
* the current token
|
||||||
* @return a valid token object
|
* @return a valid token object
|
||||||
|
|
|
@ -27,24 +27,29 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
|
|
||||||
import static org.apache.commons.csv.Token.Type.*;
|
import static org.apache.commons.csv.Token.Type.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses CSV files according to the specified configuration.
|
* Parses CSV files according to the specified configuration.
|
||||||
*
|
*
|
||||||
* Because CSV appears in many different dialects, the parser supports many
|
* Because CSV appears in many different dialects, the parser supports many configuration settings by allowing the
|
||||||
* configuration settings by allowing the specification of a {@link CSVFormat}.
|
* specification of a {@link CSVFormat}.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* Parsing of a csv-string having tabs as separators, '"' as an optional value encapsulator, and comments starting with
|
||||||
|
* '#':
|
||||||
|
* </p>
|
||||||
*
|
*
|
||||||
* <p>Parsing of a csv-string having tabs as separators,
|
|
||||||
* '"' as an optional value encapsulator, and comments starting with '#':</p>
|
|
||||||
* <pre>
|
* <pre>
|
||||||
* CSVFormat format = new CSVFormat('\t', '"', '#');
|
* CSVFormat format = new CSVFormat('\t', '"', '#');
|
||||||
* Reader in = new StringReader("a\tb\nc\td");
|
* Reader in = new StringReader("a\tb\nc\td");
|
||||||
* List<CSVRecord> records = new CSVParser(in, format).getRecords();
|
* List<CSVRecord> records = new CSVParser(in, format).getRecords();
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* <p>Parsing of a csv-string in Excel CSV format, using a for-each loop:</p>
|
* <p>
|
||||||
|
* Parsing of a csv-string in Excel CSV format, using a for-each loop:
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
* <pre>
|
* <pre>
|
||||||
* Reader in = new StringReader("a;b\nc;d");
|
* Reader in = new StringReader("a;b\nc;d");
|
||||||
* CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
|
* CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
|
||||||
|
@ -54,11 +59,12 @@ import static org.apache.commons.csv.Token.Type.*;
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* Internal parser state is completely covered by the format
|
* Internal parser state is completely covered by the format and the reader-state.
|
||||||
* and the reader-state.</p>
|
* </p>
|
||||||
*
|
*
|
||||||
* <p>see <a href="package-summary.html">package documentation</a>
|
* <p>
|
||||||
* for more details</p>
|
* see <a href="package-summary.html">package documentation</a> for more details
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
public class CSVParser implements Iterable<CSVRecord> {
|
public class CSVParser implements Iterable<CSVRecord> {
|
||||||
|
|
||||||
|
@ -74,8 +80,10 @@ public class CSVParser implements Iterable<CSVRecord> {
|
||||||
/**
|
/**
|
||||||
* CSV parser using the default {@link CSVFormat}.
|
* CSV parser using the default {@link CSVFormat}.
|
||||||
*
|
*
|
||||||
* @param input a Reader containing "csv-formatted" input
|
* @param input
|
||||||
* @throws IllegalArgumentException thrown if the parameters of the format are inconsistent
|
* a Reader containing "csv-formatted" input
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* thrown if the parameters of the format are inconsistent
|
||||||
*/
|
*/
|
||||||
public CSVParser(Reader input) throws IOException {
|
public CSVParser(Reader input) throws IOException {
|
||||||
this(input, CSVFormat.DEFAULT);
|
this(input, CSVFormat.DEFAULT);
|
||||||
|
@ -84,9 +92,12 @@ public class CSVParser implements Iterable<CSVRecord> {
|
||||||
/**
|
/**
|
||||||
* Customized CSV parser using the given {@link CSVFormat}
|
* Customized CSV parser using the given {@link CSVFormat}
|
||||||
*
|
*
|
||||||
* @param input a Reader containing "csv-formatted" input
|
* @param input
|
||||||
* @param format the CSVFormat used for CSV parsing
|
* a Reader containing "csv-formatted" input
|
||||||
* @throws IllegalArgumentException thrown if the parameters of the format are inconsistent
|
* @param format
|
||||||
|
* the CSVFormat used for CSV parsing
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* thrown if the parameters of the format are inconsistent
|
||||||
*/
|
*/
|
||||||
public CSVParser(Reader input, CSVFormat format) throws IOException {
|
public CSVParser(Reader input, CSVFormat format) throws IOException {
|
||||||
format.validate();
|
format.validate();
|
||||||
|
@ -99,23 +110,26 @@ public class CSVParser implements Iterable<CSVRecord> {
|
||||||
/**
|
/**
|
||||||
* Customized CSV parser using the given {@link CSVFormat}
|
* Customized CSV parser using the given {@link CSVFormat}
|
||||||
*
|
*
|
||||||
* @param input a String containing "csv-formatted" input
|
* @param input
|
||||||
* @param format the CSVFormat used for CSV parsing
|
* a String containing "csv-formatted" input
|
||||||
* @throws IllegalArgumentException thrown if the parameters of the format are inconsistent
|
* @param format
|
||||||
|
* the CSVFormat used for CSV parsing
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* thrown if the parameters of the format are inconsistent
|
||||||
*/
|
*/
|
||||||
public CSVParser(String input, CSVFormat format) throws IOException{
|
public CSVParser(String input, CSVFormat format) throws IOException {
|
||||||
this(new StringReader(input), format);
|
this(new StringReader(input), format);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses the CSV input according to the given format and returns the content
|
* Parses the CSV input according to the given format and returns the content as an array of {@link CSVRecord}
|
||||||
* as an array of {@link CSVRecord} entries.
|
* entries.
|
||||||
* <p/>
|
* <p/>
|
||||||
* The returned content starts at the current parse-position in the stream.
|
* The returned content starts at the current parse-position in the stream.
|
||||||
*
|
*
|
||||||
* @return list of {@link CSVRecord} entries, may be empty
|
* @return list of {@link CSVRecord} entries, may be empty
|
||||||
* @throws IOException on parse error or input read-failure
|
* @throws IOException
|
||||||
|
* on parse error or input read-failure
|
||||||
*/
|
*/
|
||||||
public List<CSVRecord> getRecords() throws IOException {
|
public List<CSVRecord> getRecords() throws IOException {
|
||||||
List<CSVRecord> records = new ArrayList<CSVRecord>();
|
List<CSVRecord> records = new ArrayList<CSVRecord>();
|
||||||
|
@ -130,7 +144,8 @@ public class CSVParser implements Iterable<CSVRecord> {
|
||||||
* Parses the next record from the current point in the stream.
|
* Parses the next record from the current point in the stream.
|
||||||
*
|
*
|
||||||
* @return the record as an array of values, or <tt>null</tt> if the end of the stream has been reached
|
* @return the record as an array of values, or <tt>null</tt> if the end of the stream has been reached
|
||||||
* @throws IOException on parse error or input read-failure
|
* @throws IOException
|
||||||
|
* on parse error or input read-failure
|
||||||
*/
|
*/
|
||||||
CSVRecord getRecord() throws IOException {
|
CSVRecord getRecord() throws IOException {
|
||||||
CSVRecord result = new CSVRecord(null, headerMapping, null);
|
CSVRecord result = new CSVRecord(null, headerMapping, null);
|
||||||
|
@ -140,36 +155,36 @@ public class CSVParser implements Iterable<CSVRecord> {
|
||||||
reusableToken.reset();
|
reusableToken.reset();
|
||||||
lexer.nextToken(reusableToken);
|
lexer.nextToken(reusableToken);
|
||||||
switch (reusableToken.type) {
|
switch (reusableToken.type) {
|
||||||
case TOKEN:
|
case TOKEN:
|
||||||
|
record.add(reusableToken.content.toString());
|
||||||
|
break;
|
||||||
|
case EORECORD:
|
||||||
|
record.add(reusableToken.content.toString());
|
||||||
|
break;
|
||||||
|
case EOF:
|
||||||
|
if (reusableToken.isReady) {
|
||||||
record.add(reusableToken.content.toString());
|
record.add(reusableToken.content.toString());
|
||||||
break;
|
} else {
|
||||||
case EORECORD:
|
result = null;
|
||||||
record.add(reusableToken.content.toString());
|
}
|
||||||
break;
|
break;
|
||||||
case EOF:
|
case INVALID:
|
||||||
if (reusableToken.isReady) {
|
throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");
|
||||||
record.add(reusableToken.content.toString());
|
case COMMENT: // Ignored currently
|
||||||
} else {
|
if (sb == null) { // first comment for this record
|
||||||
result = null;
|
sb = new StringBuilder();
|
||||||
}
|
} else {
|
||||||
break;
|
sb.append("\n");
|
||||||
case INVALID:
|
}
|
||||||
throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");
|
sb.append(reusableToken.content);
|
||||||
case COMMENT: // Ignored currently
|
reusableToken.type = TOKEN; // Read another token
|
||||||
if (sb == null) { // first comment for this record
|
break;
|
||||||
sb = new StringBuilder();
|
|
||||||
} else {
|
|
||||||
sb.append("\n");
|
|
||||||
}
|
|
||||||
sb.append(reusableToken.content);
|
|
||||||
reusableToken.type = TOKEN; // Read another token
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
} while (reusableToken.type == TOKEN);
|
} while (reusableToken.type == TOKEN);
|
||||||
|
|
||||||
if (!record.isEmpty()) {
|
if (!record.isEmpty()) {
|
||||||
result = new CSVRecord(record.toArray(new String[record.size()]), headerMapping,
|
result = new CSVRecord(record.toArray(new String[record.size()]), headerMapping, sb == null ? null
|
||||||
sb == null ? null : sb.toString());
|
: sb.toString());
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -204,8 +219,8 @@ public class CSVParser implements Iterable<CSVRecord> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an iterator on the records. IOExceptions occuring
|
* Returns an iterator on the records. IOExceptions occuring during the iteration are wrapped in a
|
||||||
* during the iteration are wrapped in a RuntimeException.
|
* RuntimeException.
|
||||||
*/
|
*/
|
||||||
public Iterator<CSVRecord> iterator() {
|
public Iterator<CSVRecord> iterator() {
|
||||||
return new Iterator<CSVRecord>() {
|
return new Iterator<CSVRecord>() {
|
||||||
|
@ -251,8 +266,7 @@ public class CSVParser implements Iterable<CSVRecord> {
|
||||||
/**
|
/**
|
||||||
* Returns the current line number in the input stream.
|
* Returns the current line number in the input stream.
|
||||||
* <p/>
|
* <p/>
|
||||||
* ATTENTION: in case your csv has multiline-values the returned
|
* ATTENTION: in case your csv has multiline-values the returned number does not correspond to the record-number
|
||||||
* number does not correspond to the record-number
|
|
||||||
*
|
*
|
||||||
* @return current line number
|
* @return current line number
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -35,12 +35,15 @@ public class CSVPrinter {
|
||||||
/**
|
/**
|
||||||
* Create a printer that will print values to the given stream following the CSVFormat.
|
* Create a printer that will print values to the given stream following the CSVFormat.
|
||||||
* <p/>
|
* <p/>
|
||||||
* Currently, only a pure encapsulation format or a pure escaping format
|
* Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats
|
||||||
* is supported. Hybrid formats (encapsulation and escaping with a different character) are not supported.
|
* (encapsulation and escaping with a different character) are not supported.
|
||||||
*
|
*
|
||||||
* @param out stream to which to print.
|
* @param out
|
||||||
* @param format the CSV format. If null the default format is used ({@link CSVFormat#DEFAULT})
|
* stream to which to print.
|
||||||
* @throws IllegalArgumentException thrown if the parameters of the format are inconsistent
|
* @param format
|
||||||
|
* the CSV format. If null the default format is used ({@link CSVFormat#DEFAULT})
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* thrown if the parameters of the format are inconsistent
|
||||||
*/
|
*/
|
||||||
public CSVPrinter(Appendable out, CSVFormat format) {
|
public CSVPrinter(Appendable out, CSVFormat format) {
|
||||||
this.out = out;
|
this.out = out;
|
||||||
|
@ -49,7 +52,7 @@ public class CSVPrinter {
|
||||||
}
|
}
|
||||||
|
|
||||||
// ======================================================
|
// ======================================================
|
||||||
// printing implementation
|
// printing implementation
|
||||||
// ======================================================
|
// ======================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -72,11 +75,11 @@ public class CSVPrinter {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Print a single line of comma separated values.
|
* Print a single line of comma separated values. The values will be quoted if needed. Quotes and newLine
|
||||||
* The values will be quoted if needed. Quotes and
|
* characters will be escaped.
|
||||||
* newLine characters will be escaped.
|
|
||||||
*
|
*
|
||||||
* @param values values to be outputted.
|
* @param values
|
||||||
|
* values to be outputted.
|
||||||
*/
|
*/
|
||||||
public void println(String... values) throws IOException {
|
public void println(String... values) throws IOException {
|
||||||
for (String value : values) {
|
for (String value : values) {
|
||||||
|
@ -85,16 +88,15 @@ public class CSVPrinter {
|
||||||
println();
|
println();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Put a comment on a new line among the comma separated values. Comments
|
* Put a comment on a new line among the comma separated values. Comments will always begin on a new line and
|
||||||
* will always begin on a new line and occupy a least one full line. The
|
* occupy a least one full line. The character specified to start comments and a space will be inserted at the
|
||||||
* character specified to start comments and a space will be inserted at
|
* beginning of each new line in the comment.
|
||||||
* the beginning of each new line in the comment.
|
|
||||||
* <p/>
|
* <p/>
|
||||||
* If comments are disabled in the current CSV format this method does nothing.
|
* If comments are disabled in the current CSV format this method does nothing.
|
||||||
*
|
*
|
||||||
* @param comment the comment to output
|
* @param comment
|
||||||
|
* the comment to output
|
||||||
*/
|
*/
|
||||||
public void printComment(String comment) throws IOException {
|
public void printComment(String comment) throws IOException {
|
||||||
if (!format.isCommentingEnabled()) {
|
if (!format.isCommentingEnabled()) {
|
||||||
|
@ -108,25 +110,24 @@ public class CSVPrinter {
|
||||||
for (int i = 0; i < comment.length(); i++) {
|
for (int i = 0; i < comment.length(); i++) {
|
||||||
char c = comment.charAt(i);
|
char c = comment.charAt(i);
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '\r':
|
case '\r':
|
||||||
if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') {
|
if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') {
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
//$FALL-THROUGH$ break intentionally excluded.
|
//$FALL-THROUGH$ break intentionally excluded.
|
||||||
case '\n':
|
case '\n':
|
||||||
println();
|
println();
|
||||||
out.append(format.getCommentStart());
|
out.append(format.getCommentStart());
|
||||||
out.append(' ');
|
out.append(' ');
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
out.append(c);
|
out.append(c);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
println();
|
println();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void print(CharSequence value, int offset, int len) throws IOException {
|
private void print(CharSequence value, int offset, int len) throws IOException {
|
||||||
if (format.isEncapsulating()) {
|
if (format.isEncapsulating()) {
|
||||||
printAndEncapsulate(value, offset, len);
|
printAndEncapsulate(value, offset, len);
|
||||||
|
@ -185,7 +186,7 @@ public class CSVPrinter {
|
||||||
}
|
}
|
||||||
|
|
||||||
void printAndEncapsulate(CharSequence value, int offset, int len) throws IOException {
|
void printAndEncapsulate(CharSequence value, int offset, int len) throws IOException {
|
||||||
boolean first = newLine; // is this the first value on this line?
|
boolean first = newLine; // is this the first value on this line?
|
||||||
boolean quote = false;
|
boolean quote = false;
|
||||||
int start = offset;
|
int start = offset;
|
||||||
int pos = offset;
|
int pos = offset;
|
||||||
|
@ -208,16 +209,12 @@ public class CSVPrinter {
|
||||||
char c = value.charAt(pos);
|
char c = value.charAt(pos);
|
||||||
|
|
||||||
// Hmmm, where did this rule come from?
|
// Hmmm, where did this rule come from?
|
||||||
if (first
|
if (first && (c < '0' || (c > '9' && c < 'A') || (c > 'Z' && c < 'a') || (c > 'z'))) {
|
||||||
&& (c < '0'
|
|
||||||
|| (c > '9' && c < 'A')
|
|
||||||
|| (c > 'Z' && c < 'a')
|
|
||||||
|| (c > 'z'))) {
|
|
||||||
quote = true;
|
quote = true;
|
||||||
// } else if (c == ' ' || c == '\f' || c == '\t') {
|
// } else if (c == ' ' || c == '\f' || c == '\t') {
|
||||||
} else if (c <= '#') {
|
} else if (c <= '#') {
|
||||||
// Some other chars at the start of a value caused the parser to fail, so for now
|
// Some other chars at the start of a value caused the parser to fail, so for now
|
||||||
// encapsulate if we start in anything less than '#'. We are being conservative
|
// encapsulate if we start in anything less than '#'. We are being conservative
|
||||||
// by including the default comment char too.
|
// by including the default comment char too.
|
||||||
quote = true;
|
quote = true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -274,10 +271,11 @@ public class CSVPrinter {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Print the string as the next value on the line. The value
|
* Print the string as the next value on the line. The value will be escaped or encapsulated as needed if
|
||||||
* will be escaped or encapsulated as needed if checkForEscape==true
|
* checkForEscape==true
|
||||||
*
|
*
|
||||||
* @param value value to be outputted.
|
* @param value
|
||||||
|
* value to be outputted.
|
||||||
*/
|
*/
|
||||||
public void print(String value, boolean checkForEscape) throws IOException {
|
public void print(String value, boolean checkForEscape) throws IOException {
|
||||||
if (value == null) {
|
if (value == null) {
|
||||||
|
@ -295,10 +293,10 @@ public class CSVPrinter {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Print the string as the next value on the line. The value
|
* Print the string as the next value on the line. The value will be escaped or encapsulated as needed.
|
||||||
* will be escaped or encapsulated as needed.
|
|
||||||
*
|
*
|
||||||
* @param value value to be outputted.
|
* @param value
|
||||||
|
* value to be outputted.
|
||||||
*/
|
*/
|
||||||
public void print(String value) throws IOException {
|
public void print(String value) throws IOException {
|
||||||
print(value, true);
|
print(value, true);
|
||||||
|
|
|
@ -49,7 +49,8 @@ public class CSVRecord implements Serializable, Iterable<String> {
|
||||||
/**
|
/**
|
||||||
* Returns a value by index.
|
* Returns a value by index.
|
||||||
*
|
*
|
||||||
* @param i the index of the column retrieved
|
* @param i
|
||||||
|
* the index of the column retrieved
|
||||||
*/
|
*/
|
||||||
public String get(int i) {
|
public String get(int i) {
|
||||||
return values[i];
|
return values[i];
|
||||||
|
@ -58,9 +59,11 @@ public class CSVRecord implements Serializable, Iterable<String> {
|
||||||
/**
|
/**
|
||||||
* Returns a value by name.
|
* Returns a value by name.
|
||||||
*
|
*
|
||||||
* @param name the name of the column to be retrieved
|
* @param name
|
||||||
|
* the name of the column to be retrieved
|
||||||
* @return the column value, or {@code null} if the column name is not found
|
* @return the column value, or {@code null} if the column name is not found
|
||||||
* @throws IllegalStateException if no header mapping was provided
|
* @throws IllegalStateException
|
||||||
|
* if no header mapping was provided
|
||||||
*/
|
*/
|
||||||
public String get(String name) {
|
public String get(String name) {
|
||||||
if (mapping == null) {
|
if (mapping == null) {
|
||||||
|
@ -83,6 +86,7 @@ public class CSVRecord implements Serializable, Iterable<String> {
|
||||||
public String getComment() {
|
public String getComment() {
|
||||||
return comment;
|
return comment;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the number of values in this record.
|
* Returns the number of values in this record.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -22,11 +22,9 @@ import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A special reader decorator which supports more
|
* A special reader decorator which supports more sophisticated access to the underlying reader object.
|
||||||
* sophisticated access to the underlying reader object.
|
|
||||||
* <p>
|
* <p>
|
||||||
* In particular the reader supports a look-ahead option,
|
* In particular the reader supports a look-ahead option, which allows you to see the next char returned by
|
||||||
* which allows you to see the next char returned by
|
|
||||||
* {@link #read()}.
|
* {@link #read()}.
|
||||||
*/
|
*/
|
||||||
class ExtendedBufferedReader extends BufferedReader {
|
class ExtendedBufferedReader extends BufferedReader {
|
||||||
|
@ -65,12 +63,10 @@ class ExtendedBufferedReader extends BufferedReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the last character that was read as an integer (0 to 65535). This
|
* Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by
|
||||||
* will be the last character returned by any of the read methods. This will
|
* any of the read methods. This will not include a character read using the {@link #peek()} method. If no
|
||||||
* not include a character read using the {@link #peek()} method. If no
|
* character has been read then this will return {@link #UNDEFINED}. If the end of the stream was reached on the
|
||||||
* character has been read then this will return {@link #UNDEFINED}. If the
|
* last read then this will return {@link #END_OF_STREAM}.
|
||||||
* end of the stream was reached on the last read then this will return
|
|
||||||
* {@link #END_OF_STREAM}.
|
|
||||||
*
|
*
|
||||||
* @return the last character that was read
|
* @return the last character that was read
|
||||||
*/
|
*/
|
||||||
|
@ -91,7 +87,7 @@ class ExtendedBufferedReader extends BufferedReader {
|
||||||
for (int i = offset; i < offset + len; i++) {
|
for (int i = offset; i < offset + len; i++) {
|
||||||
char ch = buf[i];
|
char ch = buf[i];
|
||||||
if (ch == LF) {
|
if (ch == LF) {
|
||||||
if (CR != (i > 0 ? buf[i-1]: lastChar)) {
|
if (CR != (i > 0 ? buf[i - 1] : lastChar)) {
|
||||||
lineCounter++;
|
lineCounter++;
|
||||||
}
|
}
|
||||||
} else if (ch == CR) {
|
} else if (ch == CR) {
|
||||||
|
@ -109,14 +105,12 @@ class ExtendedBufferedReader extends BufferedReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calls {@link BufferedReader#readLine()} which drops the line terminator(s).
|
* Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
|
||||||
* This method should only be called when processing a comment, otherwise
|
* when processing a comment, otherwise information can be lost.
|
||||||
* information can be lost.
|
|
||||||
* <p>
|
* <p>
|
||||||
* Increments {@link #lineCounter}
|
* Increments {@link #lineCounter}
|
||||||
* <p>
|
* <p>
|
||||||
* Sets {@link #lastChar} to {@link #END_OF_STREAM} at EOF,
|
* Sets {@link #lastChar} to {@link #END_OF_STREAM} at EOF, otherwise to LF
|
||||||
* otherwise to LF
|
|
||||||
*
|
*
|
||||||
* @return the line that was read, or null if reached EOF.
|
* @return the line that was read, or null if reached EOF.
|
||||||
*/
|
*/
|
||||||
|
@ -135,12 +129,13 @@ class ExtendedBufferedReader extends BufferedReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the next character in the current reader without consuming it. So
|
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
|
||||||
* the next call to {@link #read()} will still return this value.
|
* still return this value.
|
||||||
*
|
*
|
||||||
* @return the next character
|
* @return the next character
|
||||||
*
|
*
|
||||||
* @throws IOException if there is an error in reading
|
* @throws IOException
|
||||||
|
* if there is an error in reading
|
||||||
*/
|
*/
|
||||||
int lookAhead() throws IOException {
|
int lookAhead() throws IOException {
|
||||||
super.mark(1);
|
super.mark(1);
|
||||||
|
|
|
@ -64,20 +64,20 @@ abstract class Lexer {
|
||||||
// assume c is the escape char (normally a backslash)
|
// assume c is the escape char (normally a backslash)
|
||||||
int c = in.read();
|
int c = in.read();
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'r':
|
case 'r':
|
||||||
return '\r';
|
return '\r';
|
||||||
case 'n':
|
case 'n':
|
||||||
return '\n';
|
return '\n';
|
||||||
case 't':
|
case 't':
|
||||||
return '\t';
|
return '\t';
|
||||||
case 'b':
|
case 'b':
|
||||||
return '\b';
|
return '\b';
|
||||||
case 'f':
|
case 'f':
|
||||||
return '\f';
|
return '\f';
|
||||||
case ExtendedBufferedReader.END_OF_STREAM:
|
case ExtendedBufferedReader.END_OF_STREAM:
|
||||||
throw new IOException("EOF whilst processing escape sequence");
|
throw new IOException("EOF whilst processing escape sequence");
|
||||||
default:
|
default:
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -99,8 +99,7 @@ abstract class Lexer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Greedy - accepts \n, \r and \r\n
|
* Greedy - accepts \n, \r and \r\n This checker consumes silently the second control-character...
|
||||||
* This checker consumes silently the second control-character...
|
|
||||||
*
|
*
|
||||||
* @return true if the given character is a line-terminator
|
* @return true if the given character is a line-terminator
|
||||||
*/
|
*/
|
||||||
|
@ -114,8 +113,7 @@ abstract class Lexer {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if the current character represents the start of a line:
|
* Checks if the current character represents the start of a line: a CR, LF or is at the start of the file.
|
||||||
* a CR, LF or is at the start of the file.
|
|
||||||
*
|
*
|
||||||
* @param c
|
* @param c
|
||||||
* @return true if the character is at the start of a line.
|
* @return true if the character is at the start of a line.
|
||||||
|
@ -123,6 +121,7 @@ abstract class Lexer {
|
||||||
boolean isStartOfLine(int c) {
|
boolean isStartOfLine(int c) {
|
||||||
return c == '\n' || c == '\r' || c == ExtendedBufferedReader.UNDEFINED;
|
return c == '\n' || c == '\r' || c == ExtendedBufferedReader.UNDEFINED;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if the given character indicates end of file
|
* @return true if the given character indicates end of file
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -65,6 +65,6 @@ class Token {
|
||||||
// Provide toString method for IDE debugging
|
// Provide toString method for IDE debugging
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return type.name()+" ["+content.toString()+"]";
|
return type.name() + " [" + content.toString() + "]";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue