Merge branch 'apache:master' into CSV-147
This commit is contained in:
commit
156f1f5071
2
pom.xml
2
pom.xml
|
@ -20,7 +20,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
<artifactId>commons-parent</artifactId>
|
<artifactId>commons-parent</artifactId>
|
||||||
<version>60</version>
|
<version>61</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>commons-csv</artifactId>
|
<artifactId>commons-csv</artifactId>
|
||||||
<version>1.10.1-SNAPSHOT</version>
|
<version>1.10.1-SNAPSHOT</version>
|
||||||
|
|
|
@ -50,9 +50,10 @@
|
||||||
<action type="fix" dev="ggregory" due-to="step-security-bot">[StepSecurity] CI: Harden GitHub Actions #329, #330.</action>
|
<action type="fix" dev="ggregory" due-to="step-security-bot">[StepSecurity] CI: Harden GitHub Actions #329, #330.</action>
|
||||||
<!-- UPDATE -->
|
<!-- UPDATE -->
|
||||||
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-io:commons-io: from 2.11.0 to 2.13.0.</action>
|
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-io:commons-io: from 2.11.0 to 2.13.0.</action>
|
||||||
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-parent from 57 to 60.</action>
|
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-parent from 57 to 61.</action>
|
||||||
<action type="update" dev="ggregory" due-to="Dependabot">Bump h2 from 2.1.214 to 2.2.220 #333.</action>
|
<action type="update" dev="ggregory" due-to="Dependabot">Bump h2 from 2.1.214 to 2.2.220 #333.</action>
|
||||||
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-lang3 from 3.12.0 to 3.13.0.</action>
|
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-lang3 from 3.12.0 to 3.13.0.</action>
|
||||||
|
<action type="update" dev="ggregory" due-to="Buddhi De Silva, Michael Osipov, Gary Gregory">Update exception message in CSVRecord#getNextRecord() #348.</action>
|
||||||
</release>
|
</release>
|
||||||
<release version="1.10.0" date="2023-01-28" description="Feature and bug fix release (Java 8)">
|
<release version="1.10.0" date="2023-01-28" description="Feature and bug fix release (Java 8)">
|
||||||
<!-- FIX -->
|
<!-- FIX -->
|
||||||
|
|
|
@ -477,7 +477,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* The header is also used by the {@link CSVPrinter}.
|
* The header is also used by the {@link CSVPrinter}.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
|
* @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
|
||||||
* @return This instance.
|
* @return This instance.
|
||||||
*/
|
*/
|
||||||
public Builder setHeader(final Class<? extends Enum<?>> headerEnum) {
|
public Builder setHeader(final Class<? extends Enum<?>> headerEnum) {
|
||||||
|
@ -491,7 +491,7 @@ public final class CSVFormat implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the header from the result set metadata. The header can either be parsed automatically from the input file with:
|
* Sets the header from the result set metadata. The header can be parsed automatically from the input file with:
|
||||||
*
|
*
|
||||||
* <pre>
|
* <pre>
|
||||||
* builder.setHeader();
|
* builder.setHeader();
|
||||||
|
@ -506,7 +506,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* The header is also used by the {@link CSVPrinter}.
|
* The header is also used by the {@link CSVPrinter}.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
|
* @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
|
||||||
* @return This instance.
|
* @return This instance.
|
||||||
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
|
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
|
||||||
*/
|
*/
|
||||||
|
@ -515,7 +515,7 @@ public final class CSVFormat implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the header from the result set metadata. The header can either be parsed automatically from the input file with:
|
* Sets the header from the result set metadata. The header can be parsed automatically from the input file with:
|
||||||
*
|
*
|
||||||
* <pre>
|
* <pre>
|
||||||
* builder.setHeader();
|
* builder.setHeader();
|
||||||
|
@ -530,7 +530,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* The header is also used by the {@link CSVPrinter}.
|
* The header is also used by the {@link CSVPrinter}.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
|
* @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
|
||||||
* @return This instance.
|
* @return This instance.
|
||||||
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
|
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
|
||||||
*/
|
*/
|
||||||
|
@ -547,7 +547,7 @@ public final class CSVFormat implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the header to the given values. The header can either be parsed automatically from the input file with:
|
* Sets the header to the given values. The header can be parsed automatically from the input file with:
|
||||||
*
|
*
|
||||||
* <pre>
|
* <pre>
|
||||||
* builder.setHeader();
|
* builder.setHeader();
|
||||||
|
@ -562,7 +562,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* The header is also used by the {@link CSVPrinter}.
|
* The header is also used by the {@link CSVPrinter}.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @param header the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
|
* @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
|
||||||
* @return This instance.
|
* @return This instance.
|
||||||
*/
|
*/
|
||||||
public Builder setHeader(final String... header) {
|
public Builder setHeader(final String... header) {
|
||||||
|
@ -909,8 +909,8 @@ public final class CSVFormat implements Serializable {
|
||||||
false, false, false, DuplicateHeaderMode.ALLOW_ALL);
|
false, false, false, DuplicateHeaderMode.ALLOW_ALL);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
|
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale-dependent, it might be necessary
|
||||||
* to customize this format to accommodate to your regional settings.
|
* to customize this format to accommodate your regional settings.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* For example for parsing or generating a CSV file on a French system the following format will be used:
|
* For example for parsing or generating a CSV file on a French system the following format will be used:
|
||||||
|
@ -949,7 +949,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation.
|
* Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
|
* This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
|
||||||
* The default NULL string is {@code "\\N"}.
|
* The default NULL string is {@code "\\N"}.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
|
@ -981,7 +981,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.)
|
* Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.)
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
|
* This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
|
||||||
* The default NULL string is {@code "\\N"}.
|
* The default NULL string is {@code "\\N"}.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
|
@ -1084,7 +1084,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations.
|
* Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
|
* This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special
|
||||||
* characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
|
* characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
|
@ -1121,7 +1121,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* Default Oracle format used by the SQL*Loader utility.
|
* Default Oracle format used by the SQL*Loader utility.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* This is a comma-delimited format with the system line separator character as the record separator.Values are
|
* This is a comma-delimited format with the system line separator character as the record separator. Values are
|
||||||
* double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is
|
* double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is
|
||||||
* {@code ""}. Values are trimmed.
|
* {@code ""}. Values are trimmed.
|
||||||
* </p>
|
* </p>
|
||||||
|
@ -1161,7 +1161,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* Default PostgreSQL CSV format used by the {@code COPY} operation.
|
* Default PostgreSQL CSV format used by the {@code COPY} operation.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special
|
* This is a comma-delimited format with an LF character as the line separator. Values are double quoted and special
|
||||||
* characters are not escaped. The default NULL string is {@code ""}.
|
* characters are not escaped. The default NULL string is {@code ""}.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
|
@ -1199,7 +1199,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* Default PostgreSQL text format used by the {@code COPY} operation.
|
* Default PostgreSQL text format used by the {@code COPY} operation.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
|
* This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special
|
||||||
* characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}.
|
* characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
|
@ -1890,7 +1890,7 @@ public final class CSVFormat implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests whether escape are being processed.
|
* Tests whether escapes are being processed.
|
||||||
*
|
*
|
||||||
* @return {@code true} if escapes are processed
|
* @return {@code true} if escapes are processed
|
||||||
*/
|
*/
|
||||||
|
@ -1899,7 +1899,7 @@ public final class CSVFormat implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests whether a nullString has been defined.
|
* Tests whether a null string has been defined.
|
||||||
*
|
*
|
||||||
* @return {@code true} if a nullString is defined
|
* @return {@code true} if a nullString is defined
|
||||||
*/
|
*/
|
||||||
|
@ -2009,7 +2009,7 @@ public final class CSVFormat implements Serializable {
|
||||||
if (object == null) {
|
if (object == null) {
|
||||||
out.append(value);
|
out.append(value);
|
||||||
} else if (isQuoteCharacterSet()) {
|
} else if (isQuoteCharacterSet()) {
|
||||||
// the original object is needed so can check for Number
|
// The original object is needed so can check for Number
|
||||||
printWithQuotes(object, value, out, newRecord);
|
printWithQuotes(object, value, out, newRecord);
|
||||||
} else if (isEscapeCharacterSet()) {
|
} else if (isEscapeCharacterSet()) {
|
||||||
printWithEscapes(value, out);
|
printWithEscapes(value, out);
|
||||||
|
@ -2086,7 +2086,7 @@ public final class CSVFormat implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the record separator.
|
* Prints the given {@code values} to {@code out} as a single record of delimiter-separated values followed by the record separator.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing
|
* The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing
|
||||||
|
@ -2241,7 +2241,7 @@ public final class CSVFormat implements Serializable {
|
||||||
return;
|
return;
|
||||||
case MINIMAL:
|
case MINIMAL:
|
||||||
if (len <= 0) {
|
if (len <= 0) {
|
||||||
// always quote an empty token that is the first
|
// Always quote an empty token that is the first
|
||||||
// on the line, as it may be the only thing on the
|
// on the line, as it may be the only thing on the
|
||||||
// line. If it were not quoted in that case,
|
// line. If it were not quoted in that case,
|
||||||
// an empty line has no tokens.
|
// an empty line has no tokens.
|
||||||
|
@ -2279,7 +2279,7 @@ public final class CSVFormat implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!quote) {
|
if (!quote) {
|
||||||
// no encapsulation needed - write out the original value
|
// No encapsulation needed - write out the original value
|
||||||
out.append(charSeq, start, len);
|
out.append(charSeq, start, len);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -2289,12 +2289,12 @@ public final class CSVFormat implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!quote) {
|
if (!quote) {
|
||||||
// no encapsulation needed - write out the original value
|
// No encapsulation needed - write out the original value
|
||||||
out.append(charSeq, start, len);
|
out.append(charSeq, start, len);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// we hit something that needed encapsulation
|
// We hit something that needed encapsulation
|
||||||
out.append(quoteChar);
|
out.append(quoteChar);
|
||||||
|
|
||||||
// Pick up where we left off: pos should be positioned on the first character that caused
|
// Pick up where we left off: pos should be positioned on the first character that caused
|
||||||
|
@ -2310,13 +2310,13 @@ public final class CSVFormat implements Serializable {
|
||||||
pos++;
|
pos++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// write the last segment
|
// Write the last segment
|
||||||
out.append(charSeq, start, pos);
|
out.append(charSeq, start, pos);
|
||||||
out.append(quoteChar);
|
out.append(quoteChar);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Always use quotes unless QuoteMode is NONE, so we not have to look ahead.
|
* Always use quotes unless QuoteMode is NONE, so we do not have to look ahead.
|
||||||
*
|
*
|
||||||
* @param reader What to print
|
* @param reader What to print
|
||||||
* @param appendable Where to print it
|
* @param appendable Where to print it
|
||||||
|
@ -2417,8 +2417,8 @@ public final class CSVFormat implements Serializable {
|
||||||
/**
|
/**
|
||||||
* Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary.
|
* Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary.
|
||||||
* <p>
|
* <p>
|
||||||
* Because an instance can be used for both writing an parsing, not all conditions can be tested here. For example allowMissingColumnNames is only used for
|
* Because an instance can be used for both writing and parsing, not all conditions can be tested here. For example, allowMissingColumnNames is only used
|
||||||
* parsing, so it cannot be used here.
|
* for parsing, so it cannot be used here.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes.
|
* @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes.
|
||||||
|
@ -2570,7 +2570,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character.
|
* Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character.
|
||||||
*
|
*
|
||||||
* @param delimiter the delimiter character
|
* @param delimiter the delimiter character
|
||||||
* @return A new CSVFormat that is equal to this with the specified character as delimiter
|
* @return A new CSVFormat that is equal to this with the specified character as a delimiter
|
||||||
* @throws IllegalArgumentException thrown if the specified character is a line break
|
* @throws IllegalArgumentException thrown if the specified character is a line break
|
||||||
* @deprecated Use {@link Builder#setDelimiter(char)}
|
* @deprecated Use {@link Builder#setDelimiter(char)}
|
||||||
*/
|
*/
|
||||||
|
@ -2679,7 +2679,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* The header is also used by the {@link CSVPrinter}.
|
* The header is also used by the {@link CSVPrinter}.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
|
* @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
|
||||||
* @return A new CSVFormat that is equal to this but with the specified header
|
* @return A new CSVFormat that is equal to this but with the specified header
|
||||||
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
|
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
|
||||||
* @since 1.1
|
* @since 1.1
|
||||||
|
@ -2735,7 +2735,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* The header is also used by the {@link CSVPrinter}.
|
* The header is also used by the {@link CSVPrinter}.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @param header the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
|
* @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
|
||||||
* @return A new CSVFormat that is equal to this but with the specified header
|
* @return A new CSVFormat that is equal to this but with the specified header
|
||||||
* @see Builder#setSkipHeaderRecord(boolean)
|
* @see Builder#setSkipHeaderRecord(boolean)
|
||||||
* @deprecated Use {@link Builder#setHeader(String...)}
|
* @deprecated Use {@link Builder#setHeader(String...)}
|
||||||
|
@ -2793,7 +2793,7 @@ public final class CSVFormat implements Serializable {
|
||||||
/**
|
/**
|
||||||
* Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}.
|
* Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}.
|
||||||
*
|
*
|
||||||
* @return A new CSVFormat that will ignore case header name.
|
* @return A new CSVFormat that will ignore the new case header name behavior.
|
||||||
* @see Builder#setIgnoreHeaderCase(boolean)
|
* @see Builder#setIgnoreHeaderCase(boolean)
|
||||||
* @since 1.3
|
* @since 1.3
|
||||||
* @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)}
|
* @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)}
|
||||||
|
|
|
@ -53,7 +53,7 @@ import java.util.stream.StreamSupport;
|
||||||
* Because CSV appears in many different dialects, the parser supports many formats by allowing the
|
* Because CSV appears in many different dialects, the parser supports many formats by allowing the
|
||||||
* specification of a {@link CSVFormat}.
|
* specification of a {@link CSVFormat}.
|
||||||
*
|
*
|
||||||
* The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream.
|
* The parser works record-wise. It is not possible to go back, once a record has been parsed from the input stream.
|
||||||
*
|
*
|
||||||
* <h2>Creating instances</h2>
|
* <h2>Creating instances</h2>
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -105,13 +105,13 @@ import java.util.stream.StreamSupport;
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* If the predefined formats don't match the format at hands, custom formats can be defined. More information about
|
* If the predefined formats don't match the format at hand, custom formats can be defined. More information about
|
||||||
* customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
|
* customizing CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* <h2>Parsing into memory</h2>
|
* <h2>Parsing into memory</h2>
|
||||||
* <p>
|
* <p>
|
||||||
* If parsing record wise is not desired, the contents of the input can be read completely into memory.
|
* If parsing record-wise is not desired, the contents of the input can be read completely into memory.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* <pre>
|
* <pre>
|
||||||
|
@ -126,14 +126,14 @@ import java.util.stream.StreamSupport;
|
||||||
*
|
*
|
||||||
* <ol>
|
* <ol>
|
||||||
* <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
|
* <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
|
||||||
* the input, those records will not end up in the in memory representation of your CSV data.</li>
|
* the input, those records will not end up in the in-memory representation of your CSV data.</li>
|
||||||
* <li>Parsing into memory may consume a lot of system resources depending on the input. For example if you're
|
* <li>Parsing into memory may consume a lot of system resources depending on the input. For example, if you're
|
||||||
* parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
|
* parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
|
||||||
* </ol>
|
* </ol>
|
||||||
*
|
*
|
||||||
* <h2>Notes</h2>
|
* <h2>Notes</h2>
|
||||||
* <p>
|
* <p>
|
||||||
* Internal parser state is completely covered by the format and the reader-state.
|
* The internal parser state is completely covered by the format and the reader state.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @see <a href="package-summary.html">package documentation for more details</a>
|
* @see <a href="package-summary.html">package documentation for more details</a>
|
||||||
|
@ -147,7 +147,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
||||||
try {
|
try {
|
||||||
return CSVParser.this.nextRecord();
|
return CSVParser.this.nextRecord();
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
throw new UncheckedIOException(e.getClass().getSimpleName() + " reading next record: " + e.toString(), e);
|
throw new UncheckedIOException("Exception reading next record: " + e.toString(), e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -304,8 +304,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
||||||
return new CSVParser(reader, format);
|
return new CSVParser(reader, format);
|
||||||
}
|
}
|
||||||
|
|
||||||
// the following objects are shared to reduce garbage
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a parser for the given {@link String}.
|
* Creates a parser for the given {@link String}.
|
||||||
*
|
*
|
||||||
|
@ -423,7 +421,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
||||||
* @param recordNumber
|
* @param recordNumber
|
||||||
* The next record number to assign
|
* The next record number to assign
|
||||||
* @throws IllegalArgumentException
|
* @throws IllegalArgumentException
|
||||||
* If the parameters of the format are inconsistent or if either reader or format are null.
|
* If the parameters of the format are inconsistent or if either the reader or format is null.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* If there is a problem reading the header or skipping the first record
|
* If there is a problem reading the header or skipping the first record
|
||||||
* @since 1.1
|
* @since 1.1
|
||||||
|
@ -702,11 +700,11 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handle whether input is parsed as null
|
* Handles whether the input is parsed as null
|
||||||
*
|
*
|
||||||
* @param input
|
* @param input
|
||||||
* the cell data to further processed
|
* the cell data to further processed
|
||||||
* @return null if input is parsed as null, or input itself if input isn't parsed as null
|
* @return null if input is parsed as null, or input itself if the input isn't parsed as null
|
||||||
*/
|
*/
|
||||||
private String handleNull(final String input) {
|
private String handleNull(final String input) {
|
||||||
final boolean isQuoted = this.reusableToken.isQuoted;
|
final boolean isQuoted = this.reusableToken.isQuoted;
|
||||||
|
@ -773,7 +771,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
||||||
* Returns the record iterator.
|
* Returns the record iterator.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* An {@link IOException} caught during the iteration are re-thrown as an
|
* An {@link IOException} caught during the iteration is re-thrown as an
|
||||||
* {@link IllegalStateException}.
|
* {@link IllegalStateException}.
|
||||||
* </p>
|
* </p>
|
||||||
* <p>
|
* <p>
|
||||||
|
|
|
@ -171,7 +171,7 @@ public final class CSVPrinter implements Flushable, Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prints a comment on a new line among the delimiter separated values.
|
* Prints a comment on a new line among the delimiter-separated values.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* Comments will always begin on a new line and occupy at least one full line. The character specified to start
|
* Comments will always begin on a new line and occupy at least one full line. The character specified to start
|
||||||
|
@ -184,7 +184,7 @@ public final class CSVPrinter implements Flushable, Closeable {
|
||||||
*
|
*
|
||||||
* <p>This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()}
|
* <p>This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()}
|
||||||
* to start a new line of the comment. Note that this might produce unexpected results for formats that do not use
|
* to start a new line of the comment. Note that this might produce unexpected results for formats that do not use
|
||||||
* line breaks as record separator.</p>
|
* line breaks as record separators.</p>
|
||||||
*
|
*
|
||||||
* @param comment
|
* @param comment
|
||||||
* the comment to output
|
* the comment to output
|
||||||
|
@ -224,7 +224,7 @@ public final class CSVPrinter implements Flushable, Closeable {
|
||||||
/**
|
/**
|
||||||
* Prints headers for a result set based on its metadata.
|
* Prints headers for a result set based on its metadata.
|
||||||
*
|
*
|
||||||
* @param resultSet The result set to query for metadata.
|
* @param resultSet The ResultSet to query for metadata.
|
||||||
* @throws IOException If an I/O error occurs.
|
* @throws IOException If an I/O error occurs.
|
||||||
* @throws SQLException If a database access error occurs or this method is called on a closed result set.
|
* @throws SQLException If a database access error occurs or this method is called on a closed result set.
|
||||||
* @since 1.9.0
|
* @since 1.9.0
|
||||||
|
@ -245,7 +245,7 @@ public final class CSVPrinter implements Flushable, Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prints the given values as a single record of delimiter separated values followed by the record separator.
|
* Prints the given values as a single record of delimiter-separated values followed by the record separator.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
|
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
|
||||||
|
@ -265,7 +265,7 @@ public final class CSVPrinter implements Flushable, Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prints the given values as a single record of delimiter separated values followed by the record separator.
|
* Prints the given values as a single record of delimiter-separated values followed by the record separator.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
|
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
|
||||||
|
@ -282,7 +282,7 @@ public final class CSVPrinter implements Flushable, Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prints the given values as a single record of delimiter separated values followed by the record separator.
|
* Prints the given values as a single record of delimiter-separated values followed by the record separator.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
|
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
|
||||||
|
@ -366,7 +366,7 @@ public final class CSVPrinter implements Flushable, Closeable {
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* If the given array only contains simple objects, this method will print a single record like
|
* If the given array only contains simple objects, this method will print a single record like
|
||||||
* {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested
|
* {@link #printRecord(Object...)}. If the given collections contain nested collections or arrays, those nested
|
||||||
* elements will each be printed as records using {@link #printRecord(Object...)}.
|
* elements will each be printed as records using {@link #printRecord(Object...)}.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
|
@ -408,11 +408,11 @@ public final class CSVPrinter implements Flushable, Closeable {
|
||||||
* Prints all the objects in the given JDBC result set.
|
* Prints all the objects in the given JDBC result set.
|
||||||
*
|
*
|
||||||
* @param resultSet
|
* @param resultSet
|
||||||
* result set the values to print.
|
* The values to print.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* If an I/O error occurs
|
* If an I/O error occurs.
|
||||||
* @throws SQLException
|
* @throws SQLException
|
||||||
* if a database access error occurs
|
* Thrown when a database access error occurs.
|
||||||
*/
|
*/
|
||||||
public void printRecords(final ResultSet resultSet) throws SQLException, IOException {
|
public void printRecords(final ResultSet resultSet) throws SQLException, IOException {
|
||||||
final int columnCount = resultSet.getMetaData().getColumnCount();
|
final int columnCount = resultSet.getMetaData().getColumnCount();
|
||||||
|
|
|
@ -32,10 +32,10 @@ import java.util.stream.Stream;
|
||||||
* <p>
|
* <p>
|
||||||
* Note: Support for {@link Serializable} is scheduled to be removed in version 2.0.
|
* Note: Support for {@link Serializable} is scheduled to be removed in version 2.0.
|
||||||
* In version 1.8 the mapping between the column header and the column index was
|
* In version 1.8 the mapping between the column header and the column index was
|
||||||
* removed from the serialised state. The class maintains serialization compatibility
|
* removed from the serialized state. The class maintains serialization compatibility
|
||||||
* with versions pre-1.8 for the record values; these must be accessed by index
|
* with versions pre-1.8 for the record values; these must be accessed by index
|
||||||
* following deserialization. There will be loss of any functionally linked to the header
|
* following deserialization. There will be a loss of any functionally linked to the header
|
||||||
* mapping when transferring serialised forms pre-1.8 to 1.8 and vice versa.
|
* mapping when transferring serialized forms pre-1.8 to 1.8 and vice versa.
|
||||||
* </p>
|
* </p>
|
||||||
*/
|
*/
|
||||||
public final class CSVRecord implements Serializable, Iterable<String> {
|
public final class CSVRecord implements Serializable, Iterable<String> {
|
||||||
|
@ -143,8 +143,8 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
||||||
/**
|
/**
|
||||||
* Returns the comment for this record, if any.
|
* Returns the comment for this record, if any.
|
||||||
* Note that comments are attached to the following record.
|
* Note that comments are attached to the following record.
|
||||||
* If there is no following record (i.e. the comment is at EOF)
|
* If there is no following record (i.e. the comment is at EOF),
|
||||||
* the comment will be ignored.
|
* then the comment will be ignored.
|
||||||
*
|
*
|
||||||
* @return the comment for this record, or null if no comment for this record is available.
|
* @return the comment for this record, or null if no comment for this record is available.
|
||||||
*/
|
*/
|
||||||
|
@ -189,8 +189,8 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
||||||
/**
|
/**
|
||||||
* Checks whether this record has a comment, false otherwise.
|
* Checks whether this record has a comment, false otherwise.
|
||||||
* Note that comments are attached to the following record.
|
* Note that comments are attached to the following record.
|
||||||
* If there is no following record (i.e. the comment is at EOF)
|
* If there is no following record (i.e. the comment is at EOF),
|
||||||
* the comment will be ignored.
|
* then the comment will be ignored.
|
||||||
*
|
*
|
||||||
* @return true if this record has a comment, false otherwise
|
* @return true if this record has a comment, false otherwise
|
||||||
* @since 1.3
|
* @since 1.3
|
||||||
|
@ -227,22 +227,22 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks whether a column with given index has a value.
|
* Checks whether a column with a given index has a value.
|
||||||
*
|
*
|
||||||
* @param index
|
* @param index
|
||||||
* a column index (0-based)
|
* a column index (0-based)
|
||||||
* @return whether a column with given index has a value
|
* @return whether a column with a given index has a value
|
||||||
*/
|
*/
|
||||||
public boolean isSet(final int index) {
|
public boolean isSet(final int index) {
|
||||||
return 0 <= index && index < values.length;
|
return 0 <= index && index < values.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks whether a given columns is mapped and has a value.
|
* Checks whether a given column is mapped and has a value.
|
||||||
*
|
*
|
||||||
* @param name
|
* @param name
|
||||||
* the name of the column to be retrieved.
|
* the name of the column to be retrieved.
|
||||||
* @return whether a given columns is mapped and has a value
|
* @return whether a given column is mapped and has a value
|
||||||
*/
|
*/
|
||||||
public boolean isSet(final String name) {
|
public boolean isSet(final String name) {
|
||||||
return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
|
return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
|
||||||
|
@ -311,8 +311,8 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copies this record into a new Map of header name to record value. If multiple instances of a header name exists,
|
* Copies this record into a new Map of header name to record value. If multiple instances of a header name exist,
|
||||||
* only the last occurrence is mapped.
|
* then only the last occurrence is mapped.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* Editing the map does not update this instance.
|
* Editing the map does not update this instance.
|
||||||
|
|
|
@ -41,13 +41,13 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||||
/** The count of EOLs (CR/LF/CRLF) seen so far */
|
/** The count of EOLs (CR/LF/CRLF) seen so far */
|
||||||
private long eolCounter;
|
private long eolCounter;
|
||||||
|
|
||||||
/** The position, which is number of characters read so far */
|
/** The position, which is the number of characters read so far */
|
||||||
private long position;
|
private long position;
|
||||||
|
|
||||||
private boolean closed;
|
private boolean closed;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created extended buffered reader using default buffer-size
|
* Constructs a new instance using the default buffer size.
|
||||||
*/
|
*/
|
||||||
ExtendedBufferedReader(final Reader reader) {
|
ExtendedBufferedReader(final Reader reader) {
|
||||||
super(reader);
|
super(reader);
|
||||||
|
@ -107,7 +107,7 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
|
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
|
||||||
* still return this value. Does not affect line number or last character.
|
* still return this value. Does not affect the line number or the last character.
|
||||||
*
|
*
|
||||||
* @return the next character
|
* @return the next character
|
||||||
*
|
*
|
||||||
|
@ -125,7 +125,7 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||||
/**
|
/**
|
||||||
* Populates the buffer with the next {@code buf.length} characters in the
|
* Populates the buffer with the next {@code buf.length} characters in the
|
||||||
* current reader without consuming them. The next call to {@link #read()} will
|
* current reader without consuming them. The next call to {@link #read()} will
|
||||||
* still return the next value. This doesn't affect line number or last
|
* still return the next value. This doesn't affect the line number or the last
|
||||||
* character.
|
* character.
|
||||||
*
|
*
|
||||||
* @param buf the buffer to fill for the look ahead.
|
* @param buf the buffer to fill for the look ahead.
|
||||||
|
@ -199,7 +199,7 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the next line, dropping the line terminator(s). This method should only be called when processing a
|
* Gets the next line, dropping the line terminator(s). This method should only be called when processing a
|
||||||
* comment, otherwise information can be lost.
|
* comment, otherwise, information can be lost.
|
||||||
* <p>
|
* <p>
|
||||||
* Increments {@link #eolCounter} and updates {@link #position}.
|
* Increments {@link #eolCounter} and updates {@link #position}.
|
||||||
* </p>
|
* </p>
|
||||||
|
|
|
@ -42,7 +42,7 @@ final class Lexer implements Closeable {
|
||||||
private static final String LF_STRING = Character.toString(LF);
|
private static final String LF_STRING = Character.toString(LF);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it
|
* Constant char to use for disabling comments, escapes, and encapsulation. The value -2 is used because it
|
||||||
* won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two
|
* won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two
|
||||||
* chars (using surrogates) and thus there should never be a collision with a real text char.
|
* chars (using surrogates) and thus there should never be a collision with a real text char.
|
||||||
*/
|
*/
|
||||||
|
@ -146,9 +146,9 @@ final class Lexer implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests if the given character indicates end of file.
|
* Tests if the given character indicates the end of the file.
|
||||||
*
|
*
|
||||||
* @return true if the given character indicates end of file.
|
* @return true if the given character indicates the end of the file.
|
||||||
*/
|
*/
|
||||||
boolean isEndOfFile(final int ch) {
|
boolean isEndOfFile(final int ch) {
|
||||||
return ch == END_OF_STREAM;
|
return ch == END_OF_STREAM;
|
||||||
|
@ -168,7 +168,7 @@ final class Lexer implements Closeable {
|
||||||
*
|
*
|
||||||
* For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]".
|
* For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]".
|
||||||
*
|
*
|
||||||
* @return true if the next characters constitute a escape delimiter.
|
* @return true if the next characters constitute an escape delimiter.
|
||||||
* @throws IOException If an I/O error occurs.
|
* @throws IOException If an I/O error occurs.
|
||||||
*/
|
*/
|
||||||
boolean isEscapeDelimiter() throws IOException {
|
boolean isEscapeDelimiter() throws IOException {
|
||||||
|
@ -194,7 +194,7 @@ final class Lexer implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests if the current character represents the start of a line: a CR, LF or is at the start of the file.
|
* Tests if the current character represents the start of a line: a CR, LF, or is at the start of the file.
|
||||||
*
|
*
|
||||||
* @param ch the character to check
|
* @param ch the character to check
|
||||||
* @return true if the character is at the start of a line.
|
* @return true if the character is at the start of a line.
|
||||||
|
@ -214,13 +214,13 @@ final class Lexer implements Closeable {
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @param token
|
* @param token
|
||||||
* an existing Token object to reuse. The caller is responsible to initialize the Token.
|
* an existing Token object to reuse. The caller is responsible for initializing the Token.
|
||||||
* @return the next token found.
|
* @return the next token found.
|
||||||
* @throws IOException on stream access error.
|
* @throws IOException on stream access error.
|
||||||
*/
|
*/
|
||||||
Token nextToken(final Token token) throws IOException {
|
Token nextToken(final Token token) throws IOException {
|
||||||
|
|
||||||
// get the last read char (required for empty line detection)
|
// Get the last read char (required for empty line detection)
|
||||||
int lastChar = reader.getLastChar();
|
int lastChar = reader.getLastChar();
|
||||||
|
|
||||||
// read the next char and set eol
|
// read the next char and set eol
|
||||||
|
@ -234,11 +234,11 @@ final class Lexer implements Closeable {
|
||||||
// empty line detection: eol AND (last char was EOL or beginning)
|
// empty line detection: eol AND (last char was EOL or beginning)
|
||||||
if (ignoreEmptyLines) {
|
if (ignoreEmptyLines) {
|
||||||
while (eol && isStartOfLine(lastChar)) {
|
while (eol && isStartOfLine(lastChar)) {
|
||||||
// go on char ahead ...
|
// Go on char ahead ...
|
||||||
lastChar = c;
|
lastChar = c;
|
||||||
c = reader.read();
|
c = reader.read();
|
||||||
eol = readEndOfLine(c);
|
eol = readEndOfLine(c);
|
||||||
// reached end of file without any content (empty line at the end)
|
// reached the end of the file without any content (empty line at the end)
|
||||||
if (isEndOfFile(c)) {
|
if (isEndOfFile(c)) {
|
||||||
token.type = EOF;
|
token.type = EOF;
|
||||||
// don't set token.isReady here because no content
|
// don't set token.isReady here because no content
|
||||||
|
@ -247,7 +247,7 @@ final class Lexer implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// did we reach eof during the last iteration already ? EOF
|
// Did we reach EOF during the last iteration already? EOF
|
||||||
if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) {
|
if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) {
|
||||||
token.type = EOF;
|
token.type = EOF;
|
||||||
// don't set token.isReady here because no content
|
// don't set token.isReady here because no content
|
||||||
|
@ -267,7 +267,7 @@ final class Lexer implements Closeable {
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
|
||||||
// important: make sure a new char gets consumed in each iteration
|
// Important: make sure a new char gets consumed in each iteration
|
||||||
while (token.type == INVALID) {
|
while (token.type == INVALID) {
|
||||||
// ignore whitespaces at beginning of a token
|
// ignore whitespaces at beginning of a token
|
||||||
if (ignoreSurroundingSpaces) {
|
if (ignoreSurroundingSpaces) {
|
||||||
|
@ -305,12 +305,12 @@ final class Lexer implements Closeable {
|
||||||
/**
|
/**
|
||||||
* Parses an encapsulated token.
|
* Parses an encapsulated token.
|
||||||
* <p>
|
* <p>
|
||||||
* Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included
|
* Encapsulated tokens are surrounded by the given encapsulating string. The encapsulator itself might be included
|
||||||
* in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after
|
* in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after
|
||||||
* an encapsulated token are ignored. The token is finished when one of the following conditions become true:
|
* an encapsulated token is ignored. The token is finished when one of the following conditions becomes true:
|
||||||
* </p>
|
* </p>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>an unescaped encapsulator has been reached, and is followed by optional whitespace then:</li>
|
* <li>An unescaped encapsulator has been reached and is followed by optional whitespace then:</li>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>delimiter (TOKEN)</li>
|
* <li>delimiter (TOKEN)</li>
|
||||||
* <li>end of line (EORECORD)</li>
|
* <li>end of line (EORECORD)</li>
|
||||||
|
@ -321,11 +321,12 @@ final class Lexer implements Closeable {
|
||||||
* the current token
|
* the current token
|
||||||
* @return a valid token object
|
* @return a valid token object
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL
|
* Thrown when in an invalid state: EOF before closing encapsulator or invalid character before
|
||||||
|
* delimiter or EOL.
|
||||||
*/
|
*/
|
||||||
private Token parseEncapsulatedToken(final Token token) throws IOException {
|
private Token parseEncapsulatedToken(final Token token) throws IOException {
|
||||||
token.isQuoted = true;
|
token.isQuoted = true;
|
||||||
// save current line number in case needed for IOE
|
// Save current line number in case needed for IOE
|
||||||
final long startLineNumber = getCurrentLineNumber();
|
final long startLineNumber = getCurrentLineNumber();
|
||||||
int c;
|
int c;
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -385,13 +386,13 @@ final class Lexer implements Closeable {
|
||||||
/**
|
/**
|
||||||
* Parses a simple token.
|
* Parses a simple token.
|
||||||
* <p>
|
* <p>
|
||||||
* Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
|
* Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped
|
||||||
* delimiters (as \, or \;). The token is finished when one of the following conditions become true:
|
* delimiters (as \, or \;). The token is finished when one of the following conditions becomes true:
|
||||||
* </p>
|
* </p>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>end of line has been reached (EORECORD)</li>
|
* <li>The end of line has been reached (EORECORD)</li>
|
||||||
* <li>end of stream has been reached (EOF)</li>
|
* <li>The end of stream has been reached (EOF)</li>
|
||||||
* <li>an unescaped delimiter has been reached (TOKEN)</li>
|
* <li>An unescaped delimiter has been reached (TOKEN)</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* @param token
|
* @param token
|
||||||
|
|
|
@ -32,7 +32,7 @@ public enum QuoteMode {
|
||||||
ALL_NON_NULL,
|
ALL_NON_NULL,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Quotes fields which contain special characters such as a the field delimiter, quote character or any of the
|
* Quotes fields that contain special characters such as a field delimiter, quote character, or any of the
|
||||||
* characters in the line separator string.
|
* characters in the line separator string.
|
||||||
*/
|
*/
|
||||||
MINIMAL,
|
MINIMAL,
|
||||||
|
|
|
@ -21,8 +21,9 @@ import static org.apache.commons.csv.Token.Type.INVALID;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Internal token representation.
|
* Internal token representation.
|
||||||
* <p/>
|
* <p>
|
||||||
* It is used as contract between the lexer and the parser.
|
* It is used as a contract between the lexer and the parser.
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
final class Token {
|
final class Token {
|
||||||
|
|
||||||
|
@ -30,7 +31,7 @@ final class Token {
|
||||||
/** Token has no valid content, i.e. is in its initialized state. */
|
/** Token has no valid content, i.e. is in its initialized state. */
|
||||||
INVALID,
|
INVALID,
|
||||||
|
|
||||||
/** Token with content, at beginning or in the middle of a line. */
|
/** Token with content, at the beginning or in the middle of a line. */
|
||||||
TOKEN,
|
TOKEN,
|
||||||
|
|
||||||
/** Token (which can have content) when the end of file is reached. */
|
/** Token (which can have content) when the end of file is reached. */
|
||||||
|
|
|
@ -18,14 +18,14 @@
|
||||||
/**
|
/**
|
||||||
* Apache Commons CSV Format Support.
|
* Apache Commons CSV Format Support.
|
||||||
*
|
*
|
||||||
* <p>CSV are widely used as interfaces to legacy systems or manual data-imports.
|
* <p>CSV are widely used as interfaces to legacy systems or manual data imports.
|
||||||
* CSV stands for "Comma Separated Values" (or sometimes "Character Separated
|
* CSV stands for "Comma Separated Values" (or sometimes "Character Separated
|
||||||
* Values"). The CSV data format is defined in
|
* Values"). The CSV data format is defined in
|
||||||
* <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a>
|
* <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a>
|
||||||
* but many dialects exist.</p>
|
* but many dialects exist.</p>
|
||||||
*
|
*
|
||||||
* <p>Common to all file dialects is its basic structure: The CSV data-format
|
* <p>Common to all file dialects is its basic structure: The CSV data-format
|
||||||
* is record oriented, whereas each record starts on a new textual line. A
|
* is record-oriented, whereas each record starts on a new textual line. A
|
||||||
* record is build of a list of values. Keep in mind that not all records
|
* record is build of a list of values. Keep in mind that not all records
|
||||||
* must have an equal number of values:</p>
|
* must have an equal number of values:</p>
|
||||||
* <pre>
|
* <pre>
|
||||||
|
@ -36,28 +36,28 @@
|
||||||
* <p>The following list contains the CSV aspects the Commons CSV parser supports:</p>
|
* <p>The following list contains the CSV aspects the Commons CSV parser supports:</p>
|
||||||
* <dl>
|
* <dl>
|
||||||
* <dt>Separators (for lines)</dt>
|
* <dt>Separators (for lines)</dt>
|
||||||
* <dd>The record separators are hardcoded and cannot be changed. The must be '\r', '\n' or '\r\n'.</dd>
|
* <dd>The record separators are hardcoded and cannot be changed. The must be '\r', '\n', or '\r\n'.</dd>
|
||||||
*
|
*
|
||||||
* <dt>Delimiter (for values)</dt>
|
* <dt>Delimiter (for values)</dt>
|
||||||
* <dd>The delimiter for values is freely configurable (default ',').</dd>
|
* <dd>The delimiter for values is freely configurable (default ',').</dd>
|
||||||
*
|
*
|
||||||
* <dt>Comments</dt>
|
* <dt>Comments</dt>
|
||||||
* <dd>Some CSV-dialects support a simple comment syntax. A comment is a record
|
* <dd>Some CSV dialects support a simple comment syntax. A comment is a record
|
||||||
* which must start with a designated character (the commentStarter). A record
|
* which must start with a designated character (the commentStarter). A record
|
||||||
* of this kind is treated as comment and gets removed from the input (default none)</dd>
|
* of this kind is treated as a comment and gets removed from the input (default none)</dd>
|
||||||
*
|
*
|
||||||
* <dt>Encapsulator</dt>
|
* <dt>Encapsulator</dt>
|
||||||
* <dd>Two encapsulator characters (default '"') are used to enclose -> complex values.</dd>
|
* <dd>Two encapsulator characters (default '"') are used to enclose -> complex values.</dd>
|
||||||
*
|
*
|
||||||
* <dt>Simple values</dt>
|
* <dt>Simple values</dt>
|
||||||
* <dd>A simple value consist of all characters (except the delimiter) until
|
* <dd>A simple value consists of all characters (except the delimiter) until
|
||||||
* (but not including) the next delimiter or a record-terminator. Optionally
|
* (but not including) the next delimiter or a record terminator. Optionally
|
||||||
* all surrounding whitespaces of a simple value can be ignored (default: true).</dd>
|
* all surrounding whitespaces of a simple value can be ignored (default: true).</dd>
|
||||||
*
|
*
|
||||||
* <dt>Complex values</dt>
|
* <dt>Complex values</dt>
|
||||||
* <dd>Complex values are encapsulated within a pair of the defined encapsulator characters.
|
* <dd>Complex values are encapsulated within a pair of the defined encapsulator characters.
|
||||||
* The encapsulator itself must be escaped or doubled when used inside complex values.
|
* The encapsulator itself must be escaped or doubled when used inside complex values.
|
||||||
* Complex values preserve all kind of formatting (including newlines -> multiline-values)</dd>
|
* Complex values preserve all kinds of formatting (including newlines -> multiline-values)</dd>
|
||||||
*
|
*
|
||||||
* <dt>Empty line skipping</dt>
|
* <dt>Empty line skipping</dt>
|
||||||
* <dd>Optionally empty lines in CSV files can be skipped.
|
* <dd>Optionally empty lines in CSV files can be skipped.
|
||||||
|
|
Loading…
Reference in New Issue