Merge branch 'apache:master' into CSV-147

This commit is contained in:
Buddhi De Silva 2023-08-30 19:23:35 +05:30 committed by GitHub
commit 156f1f5071
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 109 additions and 108 deletions

View File

@ -20,7 +20,7 @@
<parent> <parent>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>
<artifactId>commons-parent</artifactId> <artifactId>commons-parent</artifactId>
<version>60</version> <version>61</version>
</parent> </parent>
<artifactId>commons-csv</artifactId> <artifactId>commons-csv</artifactId>
<version>1.10.1-SNAPSHOT</version> <version>1.10.1-SNAPSHOT</version>

View File

@ -50,9 +50,10 @@
<action type="fix" dev="ggregory" due-to="step-security-bot">[StepSecurity] CI: Harden GitHub Actions #329, #330.</action> <action type="fix" dev="ggregory" due-to="step-security-bot">[StepSecurity] CI: Harden GitHub Actions #329, #330.</action>
<!-- UPDATE --> <!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-io:commons-io: from 2.11.0 to 2.13.0.</action> <action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-io:commons-io: from 2.11.0 to 2.13.0.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-parent from 57 to 60.</action> <action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-parent from 57 to 61.</action>
<action type="update" dev="ggregory" due-to="Dependabot">Bump h2 from 2.1.214 to 2.2.220 #333.</action> <action type="update" dev="ggregory" due-to="Dependabot">Bump h2 from 2.1.214 to 2.2.220 #333.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-lang3 from 3.12.0 to 3.13.0.</action> <action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-lang3 from 3.12.0 to 3.13.0.</action>
<action type="update" dev="ggregory" due-to="Buddhi De Silva, Michael Osipov, Gary Gregory">Update exception message in CSVRecord#getNextRecord() #348.</action>
</release> </release>
<release version="1.10.0" date="2023-01-28" description="Feature and bug fix release (Java 8)"> <release version="1.10.0" date="2023-01-28" description="Feature and bug fix release (Java 8)">
<!-- FIX --> <!-- FIX -->

View File

@ -477,7 +477,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}. * The header is also used by the {@link CSVPrinter}.
* </p> * </p>
* *
* @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return This instance. * @return This instance.
*/ */
public Builder setHeader(final Class<? extends Enum<?>> headerEnum) { public Builder setHeader(final Class<? extends Enum<?>> headerEnum) {
@ -491,7 +491,7 @@ public final class CSVFormat implements Serializable {
} }
/** /**
* Sets the header from the result set metadata. The header can either be parsed automatically from the input file with: * Sets the header from the result set metadata. The header can be parsed automatically from the input file with:
* *
* <pre> * <pre>
* builder.setHeader(); * builder.setHeader();
@ -506,7 +506,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}. * The header is also used by the {@link CSVPrinter}.
* </p> * </p>
* *
* @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return This instance. * @return This instance.
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
*/ */
@ -515,7 +515,7 @@ public final class CSVFormat implements Serializable {
} }
/** /**
* Sets the header from the result set metadata. The header can either be parsed automatically from the input file with: * Sets the header from the result set metadata. The header can be parsed automatically from the input file with:
* *
* <pre> * <pre>
* builder.setHeader(); * builder.setHeader();
@ -530,7 +530,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}. * The header is also used by the {@link CSVPrinter}.
* </p> * </p>
* *
* @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return This instance. * @return This instance.
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
*/ */
@ -547,7 +547,7 @@ public final class CSVFormat implements Serializable {
} }
/** /**
* Sets the header to the given values. The header can either be parsed automatically from the input file with: * Sets the header to the given values. The header can be parsed automatically from the input file with:
* *
* <pre> * <pre>
* builder.setHeader(); * builder.setHeader();
@ -562,7 +562,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}. * The header is also used by the {@link CSVPrinter}.
* </p> * </p>
* *
* @param header the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return This instance. * @return This instance.
*/ */
public Builder setHeader(final String... header) { public Builder setHeader(final String... header) {
@ -909,8 +909,8 @@ public final class CSVFormat implements Serializable {
false, false, false, DuplicateHeaderMode.ALLOW_ALL); false, false, false, DuplicateHeaderMode.ALLOW_ALL);
/** /**
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale-dependent, it might be necessary
* to customize this format to accommodate to your regional settings. * to customize this format to accommodate your regional settings.
* *
* <p> * <p>
* For example for parsing or generating a CSV file on a French system the following format will be used: * For example for parsing or generating a CSV file on a French system the following format will be used:
@ -949,7 +949,7 @@ public final class CSVFormat implements Serializable {
* Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation. * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation.
* *
* <p> * <p>
* This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
* The default NULL string is {@code "\\N"}. * The default NULL string is {@code "\\N"}.
* </p> * </p>
* *
@ -981,7 +981,7 @@ public final class CSVFormat implements Serializable {
* Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.) * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.)
* *
* <p> * <p>
* This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}. * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
* The default NULL string is {@code "\\N"}. * The default NULL string is {@code "\\N"}.
* </p> * </p>
* *
@ -1084,7 +1084,7 @@ public final class CSVFormat implements Serializable {
* Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations. * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations.
* *
* <p> * <p>
* This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special
* characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
* </p> * </p>
* *
@ -1121,7 +1121,7 @@ public final class CSVFormat implements Serializable {
* Default Oracle format used by the SQL*Loader utility. * Default Oracle format used by the SQL*Loader utility.
* *
* <p> * <p>
* This is a comma-delimited format with the system line separator character as the record separator.Values are * This is a comma-delimited format with the system line separator character as the record separator. Values are
* double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is
* {@code ""}. Values are trimmed. * {@code ""}. Values are trimmed.
* </p> * </p>
@ -1161,7 +1161,7 @@ public final class CSVFormat implements Serializable {
* Default PostgreSQL CSV format used by the {@code COPY} operation. * Default PostgreSQL CSV format used by the {@code COPY} operation.
* *
* <p> * <p>
* This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special * This is a comma-delimited format with an LF character as the line separator. Values are double quoted and special
* characters are not escaped. The default NULL string is {@code ""}. * characters are not escaped. The default NULL string is {@code ""}.
* </p> * </p>
* *
@ -1199,7 +1199,7 @@ public final class CSVFormat implements Serializable {
* Default PostgreSQL text format used by the {@code COPY} operation. * Default PostgreSQL text format used by the {@code COPY} operation.
* *
* <p> * <p>
* This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special
* characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}. * characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}.
* </p> * </p>
* *
@ -1890,7 +1890,7 @@ public final class CSVFormat implements Serializable {
} }
/** /**
* Tests whether escape are being processed. * Tests whether escapes are being processed.
* *
* @return {@code true} if escapes are processed * @return {@code true} if escapes are processed
*/ */
@ -1899,7 +1899,7 @@ public final class CSVFormat implements Serializable {
} }
/** /**
* Tests whether a nullString has been defined. * Tests whether a null string has been defined.
* *
* @return {@code true} if a nullString is defined * @return {@code true} if a nullString is defined
*/ */
@ -2009,7 +2009,7 @@ public final class CSVFormat implements Serializable {
if (object == null) { if (object == null) {
out.append(value); out.append(value);
} else if (isQuoteCharacterSet()) { } else if (isQuoteCharacterSet()) {
// the original object is needed so can check for Number // The original object is needed so can check for Number
printWithQuotes(object, value, out, newRecord); printWithQuotes(object, value, out, newRecord);
} else if (isEscapeCharacterSet()) { } else if (isEscapeCharacterSet()) {
printWithEscapes(value, out); printWithEscapes(value, out);
@ -2086,7 +2086,7 @@ public final class CSVFormat implements Serializable {
} }
/** /**
* Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the record separator. * Prints the given {@code values} to {@code out} as a single record of delimiter-separated values followed by the record separator.
* *
* <p> * <p>
* The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing
@ -2241,7 +2241,7 @@ public final class CSVFormat implements Serializable {
return; return;
case MINIMAL: case MINIMAL:
if (len <= 0) { if (len <= 0) {
// always quote an empty token that is the first // Always quote an empty token that is the first
// on the line, as it may be the only thing on the // on the line, as it may be the only thing on the
// line. If it were not quoted in that case, // line. If it were not quoted in that case,
// an empty line has no tokens. // an empty line has no tokens.
@ -2279,7 +2279,7 @@ public final class CSVFormat implements Serializable {
} }
if (!quote) { if (!quote) {
// no encapsulation needed - write out the original value // No encapsulation needed - write out the original value
out.append(charSeq, start, len); out.append(charSeq, start, len);
return; return;
} }
@ -2289,12 +2289,12 @@ public final class CSVFormat implements Serializable {
} }
if (!quote) { if (!quote) {
// no encapsulation needed - write out the original value // No encapsulation needed - write out the original value
out.append(charSeq, start, len); out.append(charSeq, start, len);
return; return;
} }
// we hit something that needed encapsulation // We hit something that needed encapsulation
out.append(quoteChar); out.append(quoteChar);
// Pick up where we left off: pos should be positioned on the first character that caused // Pick up where we left off: pos should be positioned on the first character that caused
@ -2310,13 +2310,13 @@ public final class CSVFormat implements Serializable {
pos++; pos++;
} }
// write the last segment // Write the last segment
out.append(charSeq, start, pos); out.append(charSeq, start, pos);
out.append(quoteChar); out.append(quoteChar);
} }
/** /**
* Always use quotes unless QuoteMode is NONE, so we not have to look ahead. * Always use quotes unless QuoteMode is NONE, so we do not have to look ahead.
* *
* @param reader What to print * @param reader What to print
* @param appendable Where to print it * @param appendable Where to print it
@ -2417,8 +2417,8 @@ public final class CSVFormat implements Serializable {
/** /**
* Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary. * Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary.
* <p> * <p>
* Because an instance can be used for both writing an parsing, not all conditions can be tested here. For example allowMissingColumnNames is only used for * Because an instance can be used for both writing and parsing, not all conditions can be tested here. For example, allowMissingColumnNames is only used
* parsing, so it cannot be used here. * for parsing, so it cannot be used here.
* </p> * </p>
* *
* @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes. * @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes.
@ -2570,7 +2570,7 @@ public final class CSVFormat implements Serializable {
* Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character. * Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character.
* *
* @param delimiter the delimiter character * @param delimiter the delimiter character
* @return A new CSVFormat that is equal to this with the specified character as delimiter * @return A new CSVFormat that is equal to this with the specified character as a delimiter
* @throws IllegalArgumentException thrown if the specified character is a line break * @throws IllegalArgumentException thrown if the specified character is a line break
* @deprecated Use {@link Builder#setDelimiter(char)} * @deprecated Use {@link Builder#setDelimiter(char)}
*/ */
@ -2679,7 +2679,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}. * The header is also used by the {@link CSVPrinter}.
* </p> * </p>
* *
* @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return A new CSVFormat that is equal to this but with the specified header * @return A new CSVFormat that is equal to this but with the specified header
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set. * @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
* @since 1.1 * @since 1.1
@ -2735,7 +2735,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}. * The header is also used by the {@link CSVPrinter}.
* </p> * </p>
* *
* @param header the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return A new CSVFormat that is equal to this but with the specified header * @return A new CSVFormat that is equal to this but with the specified header
* @see Builder#setSkipHeaderRecord(boolean) * @see Builder#setSkipHeaderRecord(boolean)
* @deprecated Use {@link Builder#setHeader(String...)} * @deprecated Use {@link Builder#setHeader(String...)}
@ -2793,7 +2793,7 @@ public final class CSVFormat implements Serializable {
/** /**
* Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}. * Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}.
* *
* @return A new CSVFormat that will ignore case header name. * @return A new CSVFormat that will ignore the new case header name behavior.
* @see Builder#setIgnoreHeaderCase(boolean) * @see Builder#setIgnoreHeaderCase(boolean)
* @since 1.3 * @since 1.3
* @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)} * @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)}

View File

@ -53,7 +53,7 @@ import java.util.stream.StreamSupport;
* Because CSV appears in many different dialects, the parser supports many formats by allowing the * Because CSV appears in many different dialects, the parser supports many formats by allowing the
* specification of a {@link CSVFormat}. * specification of a {@link CSVFormat}.
* *
* The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream. * The parser works record-wise. It is not possible to go back, once a record has been parsed from the input stream.
* *
* <h2>Creating instances</h2> * <h2>Creating instances</h2>
* <p> * <p>
@ -105,13 +105,13 @@ import java.util.stream.StreamSupport;
* </pre> * </pre>
* *
* <p> * <p>
* If the predefined formats don't match the format at hands, custom formats can be defined. More information about * If the predefined formats don't match the format at hand, custom formats can be defined. More information about
* customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}. * customizing CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
* </p> * </p>
* *
* <h2>Parsing into memory</h2> * <h2>Parsing into memory</h2>
* <p> * <p>
* If parsing record wise is not desired, the contents of the input can be read completely into memory. * If parsing record-wise is not desired, the contents of the input can be read completely into memory.
* </p> * </p>
* *
* <pre> * <pre>
@ -126,14 +126,14 @@ import java.util.stream.StreamSupport;
* *
* <ol> * <ol>
* <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from * <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
* the input, those records will not end up in the in memory representation of your CSV data.</li> * the input, those records will not end up in the in-memory representation of your CSV data.</li>
* <li>Parsing into memory may consume a lot of system resources depending on the input. For example if you're * <li>Parsing into memory may consume a lot of system resources depending on the input. For example, if you're
* parsing a 150MB file of CSV data the contents will be read completely into memory.</li> * parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
* </ol> * </ol>
* *
* <h2>Notes</h2> * <h2>Notes</h2>
* <p> * <p>
* Internal parser state is completely covered by the format and the reader-state. * The internal parser state is completely covered by the format and the reader state.
* </p> * </p>
* *
* @see <a href="package-summary.html">package documentation for more details</a> * @see <a href="package-summary.html">package documentation for more details</a>
@ -147,7 +147,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
try { try {
return CSVParser.this.nextRecord(); return CSVParser.this.nextRecord();
} catch (final IOException e) { } catch (final IOException e) {
throw new UncheckedIOException(e.getClass().getSimpleName() + " reading next record: " + e.toString(), e); throw new UncheckedIOException("Exception reading next record: " + e.toString(), e);
} }
} }
@ -304,8 +304,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
return new CSVParser(reader, format); return new CSVParser(reader, format);
} }
// the following objects are shared to reduce garbage
/** /**
* Creates a parser for the given {@link String}. * Creates a parser for the given {@link String}.
* *
@ -423,7 +421,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
* @param recordNumber * @param recordNumber
* The next record number to assign * The next record number to assign
* @throws IllegalArgumentException * @throws IllegalArgumentException
* If the parameters of the format are inconsistent or if either reader or format are null. * If the parameters of the format are inconsistent or if either the reader or format is null.
* @throws IOException * @throws IOException
* If there is a problem reading the header or skipping the first record * If there is a problem reading the header or skipping the first record
* @since 1.1 * @since 1.1
@ -702,11 +700,11 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
} }
/** /**
* Handle whether input is parsed as null * Handles whether the input is parsed as null
* *
* @param input * @param input
* the cell data to further processed * the cell data to further processed
* @return null if input is parsed as null, or input itself if input isn't parsed as null * @return null if input is parsed as null, or input itself if the input isn't parsed as null
*/ */
private String handleNull(final String input) { private String handleNull(final String input) {
final boolean isQuoted = this.reusableToken.isQuoted; final boolean isQuoted = this.reusableToken.isQuoted;
@ -773,7 +771,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
* Returns the record iterator. * Returns the record iterator.
* *
* <p> * <p>
* An {@link IOException} caught during the iteration are re-thrown as an * An {@link IOException} caught during the iteration is re-thrown as an
* {@link IllegalStateException}. * {@link IllegalStateException}.
* </p> * </p>
* <p> * <p>

View File

@ -171,7 +171,7 @@ public final class CSVPrinter implements Flushable, Closeable {
} }
/** /**
* Prints a comment on a new line among the delimiter separated values. * Prints a comment on a new line among the delimiter-separated values.
* *
* <p> * <p>
* Comments will always begin on a new line and occupy at least one full line. The character specified to start * Comments will always begin on a new line and occupy at least one full line. The character specified to start
@ -184,7 +184,7 @@ public final class CSVPrinter implements Flushable, Closeable {
* *
* <p>This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()} * <p>This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()}
* to start a new line of the comment. Note that this might produce unexpected results for formats that do not use * to start a new line of the comment. Note that this might produce unexpected results for formats that do not use
* line breaks as record separator.</p> * line breaks as record separators.</p>
* *
* @param comment * @param comment
* the comment to output * the comment to output
@ -224,7 +224,7 @@ public final class CSVPrinter implements Flushable, Closeable {
/** /**
* Prints headers for a result set based on its metadata. * Prints headers for a result set based on its metadata.
* *
* @param resultSet The result set to query for metadata. * @param resultSet The ResultSet to query for metadata.
* @throws IOException If an I/O error occurs. * @throws IOException If an I/O error occurs.
* @throws SQLException If a database access error occurs or this method is called on a closed result set. * @throws SQLException If a database access error occurs or this method is called on a closed result set.
* @since 1.9.0 * @since 1.9.0
@ -245,7 +245,7 @@ public final class CSVPrinter implements Flushable, Closeable {
} }
/** /**
* Prints the given values as a single record of delimiter separated values followed by the record separator. * Prints the given values as a single record of delimiter-separated values followed by the record separator.
* *
* <p> * <p>
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
@ -265,7 +265,7 @@ public final class CSVPrinter implements Flushable, Closeable {
} }
/** /**
* Prints the given values as a single record of delimiter separated values followed by the record separator. * Prints the given values as a single record of delimiter-separated values followed by the record separator.
* *
* <p> * <p>
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
@ -282,7 +282,7 @@ public final class CSVPrinter implements Flushable, Closeable {
} }
/** /**
* Prints the given values as a single record of delimiter separated values followed by the record separator. * Prints the given values as a single record of delimiter-separated values followed by the record separator.
* *
* <p> * <p>
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
@ -366,7 +366,7 @@ public final class CSVPrinter implements Flushable, Closeable {
* *
* <p> * <p>
* If the given array only contains simple objects, this method will print a single record like * If the given array only contains simple objects, this method will print a single record like
* {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested * {@link #printRecord(Object...)}. If the given collections contain nested collections or arrays, those nested
* elements will each be printed as records using {@link #printRecord(Object...)}. * elements will each be printed as records using {@link #printRecord(Object...)}.
* </p> * </p>
* *
@ -408,11 +408,11 @@ public final class CSVPrinter implements Flushable, Closeable {
* Prints all the objects in the given JDBC result set. * Prints all the objects in the given JDBC result set.
* *
* @param resultSet * @param resultSet
* result set the values to print. * The values to print.
* @throws IOException * @throws IOException
* If an I/O error occurs * If an I/O error occurs.
* @throws SQLException * @throws SQLException
* if a database access error occurs * Thrown when a database access error occurs.
*/ */
public void printRecords(final ResultSet resultSet) throws SQLException, IOException { public void printRecords(final ResultSet resultSet) throws SQLException, IOException {
final int columnCount = resultSet.getMetaData().getColumnCount(); final int columnCount = resultSet.getMetaData().getColumnCount();

View File

@ -32,10 +32,10 @@ import java.util.stream.Stream;
* <p> * <p>
* Note: Support for {@link Serializable} is scheduled to be removed in version 2.0. * Note: Support for {@link Serializable} is scheduled to be removed in version 2.0.
* In version 1.8 the mapping between the column header and the column index was * In version 1.8 the mapping between the column header and the column index was
* removed from the serialised state. The class maintains serialization compatibility * removed from the serialized state. The class maintains serialization compatibility
* with versions pre-1.8 for the record values; these must be accessed by index * with versions pre-1.8 for the record values; these must be accessed by index
* following deserialization. There will be loss of any functionally linked to the header * following deserialization. There will be a loss of any functionally linked to the header
* mapping when transferring serialised forms pre-1.8 to 1.8 and vice versa. * mapping when transferring serialized forms pre-1.8 to 1.8 and vice versa.
* </p> * </p>
*/ */
public final class CSVRecord implements Serializable, Iterable<String> { public final class CSVRecord implements Serializable, Iterable<String> {
@ -143,8 +143,8 @@ public final class CSVRecord implements Serializable, Iterable<String> {
/** /**
* Returns the comment for this record, if any. * Returns the comment for this record, if any.
* Note that comments are attached to the following record. * Note that comments are attached to the following record.
* If there is no following record (i.e. the comment is at EOF) * If there is no following record (i.e. the comment is at EOF),
* the comment will be ignored. * then the comment will be ignored.
* *
* @return the comment for this record, or null if no comment for this record is available. * @return the comment for this record, or null if no comment for this record is available.
*/ */
@ -189,8 +189,8 @@ public final class CSVRecord implements Serializable, Iterable<String> {
/** /**
* Checks whether this record has a comment, false otherwise. * Checks whether this record has a comment, false otherwise.
* Note that comments are attached to the following record. * Note that comments are attached to the following record.
* If there is no following record (i.e. the comment is at EOF) * If there is no following record (i.e. the comment is at EOF),
* the comment will be ignored. * then the comment will be ignored.
* *
* @return true if this record has a comment, false otherwise * @return true if this record has a comment, false otherwise
* @since 1.3 * @since 1.3
@ -227,22 +227,22 @@ public final class CSVRecord implements Serializable, Iterable<String> {
} }
/** /**
* Checks whether a column with given index has a value. * Checks whether a column with a given index has a value.
* *
* @param index * @param index
* a column index (0-based) * a column index (0-based)
* @return whether a column with given index has a value * @return whether a column with a given index has a value
*/ */
public boolean isSet(final int index) { public boolean isSet(final int index) {
return 0 <= index && index < values.length; return 0 <= index && index < values.length;
} }
/** /**
* Checks whether a given columns is mapped and has a value. * Checks whether a given column is mapped and has a value.
* *
* @param name * @param name
* the name of the column to be retrieved. * the name of the column to be retrieved.
* @return whether a given columns is mapped and has a value * @return whether a given column is mapped and has a value
*/ */
public boolean isSet(final String name) { public boolean isSet(final String name) {
return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
@ -311,8 +311,8 @@ public final class CSVRecord implements Serializable, Iterable<String> {
} }
/** /**
* Copies this record into a new Map of header name to record value. If multiple instances of a header name exists, * Copies this record into a new Map of header name to record value. If multiple instances of a header name exist,
* only the last occurrence is mapped. * then only the last occurrence is mapped.
* *
* <p> * <p>
* Editing the map does not update this instance. * Editing the map does not update this instance.

View File

@ -41,13 +41,13 @@ final class ExtendedBufferedReader extends BufferedReader {
/** The count of EOLs (CR/LF/CRLF) seen so far */ /** The count of EOLs (CR/LF/CRLF) seen so far */
private long eolCounter; private long eolCounter;
/** The position, which is number of characters read so far */ /** The position, which is the number of characters read so far */
private long position; private long position;
private boolean closed; private boolean closed;
/** /**
* Created extended buffered reader using default buffer-size * Constructs a new instance using the default buffer size.
*/ */
ExtendedBufferedReader(final Reader reader) { ExtendedBufferedReader(final Reader reader) {
super(reader); super(reader);
@ -107,7 +107,7 @@ final class ExtendedBufferedReader extends BufferedReader {
/** /**
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
* still return this value. Does not affect line number or last character. * still return this value. Does not affect the line number or the last character.
* *
* @return the next character * @return the next character
* *
@ -125,7 +125,7 @@ final class ExtendedBufferedReader extends BufferedReader {
/** /**
* Populates the buffer with the next {@code buf.length} characters in the * Populates the buffer with the next {@code buf.length} characters in the
* current reader without consuming them. The next call to {@link #read()} will * current reader without consuming them. The next call to {@link #read()} will
* still return the next value. This doesn't affect line number or last * still return the next value. This doesn't affect the line number or the last
* character. * character.
* *
* @param buf the buffer to fill for the look ahead. * @param buf the buffer to fill for the look ahead.
@ -199,7 +199,7 @@ final class ExtendedBufferedReader extends BufferedReader {
/** /**
* Gets the next line, dropping the line terminator(s). This method should only be called when processing a * Gets the next line, dropping the line terminator(s). This method should only be called when processing a
* comment, otherwise information can be lost. * comment, otherwise, information can be lost.
* <p> * <p>
* Increments {@link #eolCounter} and updates {@link #position}. * Increments {@link #eolCounter} and updates {@link #position}.
* </p> * </p>

View File

@ -42,7 +42,7 @@ final class Lexer implements Closeable {
private static final String LF_STRING = Character.toString(LF); private static final String LF_STRING = Character.toString(LF);
/** /**
* Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it * Constant char to use for disabling comments, escapes, and encapsulation. The value -2 is used because it
* won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two * won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two
* chars (using surrogates) and thus there should never be a collision with a real text char. * chars (using surrogates) and thus there should never be a collision with a real text char.
*/ */
@ -146,9 +146,9 @@ final class Lexer implements Closeable {
} }
/** /**
* Tests if the given character indicates end of file. * Tests if the given character indicates the end of the file.
* *
* @return true if the given character indicates end of file. * @return true if the given character indicates the end of the file.
*/ */
boolean isEndOfFile(final int ch) { boolean isEndOfFile(final int ch) {
return ch == END_OF_STREAM; return ch == END_OF_STREAM;
@ -168,7 +168,7 @@ final class Lexer implements Closeable {
* *
* For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]". * For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]".
* *
* @return true if the next characters constitute a escape delimiter. * @return true if the next characters constitute an escape delimiter.
* @throws IOException If an I/O error occurs. * @throws IOException If an I/O error occurs.
*/ */
boolean isEscapeDelimiter() throws IOException { boolean isEscapeDelimiter() throws IOException {
@ -194,7 +194,7 @@ final class Lexer implements Closeable {
} }
/** /**
* Tests if the current character represents the start of a line: a CR, LF or is at the start of the file. * Tests if the current character represents the start of a line: a CR, LF, or is at the start of the file.
* *
* @param ch the character to check * @param ch the character to check
* @return true if the character is at the start of a line. * @return true if the character is at the start of a line.
@ -214,13 +214,13 @@ final class Lexer implements Closeable {
* </p> * </p>
* *
* @param token * @param token
* an existing Token object to reuse. The caller is responsible to initialize the Token. * an existing Token object to reuse. The caller is responsible for initializing the Token.
* @return the next token found. * @return the next token found.
* @throws IOException on stream access error. * @throws IOException on stream access error.
*/ */
Token nextToken(final Token token) throws IOException { Token nextToken(final Token token) throws IOException {
// get the last read char (required for empty line detection) // Get the last read char (required for empty line detection)
int lastChar = reader.getLastChar(); int lastChar = reader.getLastChar();
// read the next char and set eol // read the next char and set eol
@ -234,11 +234,11 @@ final class Lexer implements Closeable {
// empty line detection: eol AND (last char was EOL or beginning) // empty line detection: eol AND (last char was EOL or beginning)
if (ignoreEmptyLines) { if (ignoreEmptyLines) {
while (eol && isStartOfLine(lastChar)) { while (eol && isStartOfLine(lastChar)) {
// go on char ahead ... // Go on char ahead ...
lastChar = c; lastChar = c;
c = reader.read(); c = reader.read();
eol = readEndOfLine(c); eol = readEndOfLine(c);
// reached end of file without any content (empty line at the end) // reached the end of the file without any content (empty line at the end)
if (isEndOfFile(c)) { if (isEndOfFile(c)) {
token.type = EOF; token.type = EOF;
// don't set token.isReady here because no content // don't set token.isReady here because no content
@ -247,7 +247,7 @@ final class Lexer implements Closeable {
} }
} }
// did we reach eof during the last iteration already ? EOF // Did we reach EOF during the last iteration already? EOF
if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) { if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) {
token.type = EOF; token.type = EOF;
// don't set token.isReady here because no content // don't set token.isReady here because no content
@ -267,7 +267,7 @@ final class Lexer implements Closeable {
return token; return token;
} }
// important: make sure a new char gets consumed in each iteration // Important: make sure a new char gets consumed in each iteration
while (token.type == INVALID) { while (token.type == INVALID) {
// ignore whitespaces at beginning of a token // ignore whitespaces at beginning of a token
if (ignoreSurroundingSpaces) { if (ignoreSurroundingSpaces) {
@ -305,12 +305,12 @@ final class Lexer implements Closeable {
/** /**
* Parses an encapsulated token. * Parses an encapsulated token.
* <p> * <p>
* Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included * Encapsulated tokens are surrounded by the given encapsulating string. The encapsulator itself might be included
* in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after * in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after
* an encapsulated token are ignored. The token is finished when one of the following conditions become true: * an encapsulated token is ignored. The token is finished when one of the following conditions becomes true:
* </p> * </p>
* <ul> * <ul>
* <li>an unescaped encapsulator has been reached, and is followed by optional whitespace then:</li> * <li>An unescaped encapsulator has been reached and is followed by optional whitespace then:</li>
* <ul> * <ul>
* <li>delimiter (TOKEN)</li> * <li>delimiter (TOKEN)</li>
* <li>end of line (EORECORD)</li> * <li>end of line (EORECORD)</li>
@ -321,11 +321,12 @@ final class Lexer implements Closeable {
* the current token * the current token
* @return a valid token object * @return a valid token object
* @throws IOException * @throws IOException
* on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL * Thrown when in an invalid state: EOF before closing encapsulator or invalid character before
* delimiter or EOL.
*/ */
private Token parseEncapsulatedToken(final Token token) throws IOException { private Token parseEncapsulatedToken(final Token token) throws IOException {
token.isQuoted = true; token.isQuoted = true;
// save current line number in case needed for IOE // Save current line number in case needed for IOE
final long startLineNumber = getCurrentLineNumber(); final long startLineNumber = getCurrentLineNumber();
int c; int c;
while (true) { while (true) {
@ -385,13 +386,13 @@ final class Lexer implements Closeable {
/** /**
* Parses a simple token. * Parses a simple token.
* <p> * <p>
* Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped * Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped
* delimiters (as \, or \;). The token is finished when one of the following conditions become true: * delimiters (as \, or \;). The token is finished when one of the following conditions becomes true:
* </p> * </p>
* <ul> * <ul>
* <li>end of line has been reached (EORECORD)</li> * <li>The end of line has been reached (EORECORD)</li>
* <li>end of stream has been reached (EOF)</li> * <li>The end of stream has been reached (EOF)</li>
* <li>an unescaped delimiter has been reached (TOKEN)</li> * <li>An unescaped delimiter has been reached (TOKEN)</li>
* </ul> * </ul>
* *
* @param token * @param token

View File

@ -32,7 +32,7 @@ public enum QuoteMode {
ALL_NON_NULL, ALL_NON_NULL,
/** /**
* Quotes fields which contain special characters such as a the field delimiter, quote character or any of the * Quotes fields that contain special characters such as a field delimiter, quote character, or any of the
* characters in the line separator string. * characters in the line separator string.
*/ */
MINIMAL, MINIMAL,

View File

@ -21,8 +21,9 @@ import static org.apache.commons.csv.Token.Type.INVALID;
/** /**
* Internal token representation. * Internal token representation.
* <p/> * <p>
* It is used as contract between the lexer and the parser. * It is used as a contract between the lexer and the parser.
* </p>
*/ */
final class Token { final class Token {
@ -30,7 +31,7 @@ final class Token {
/** Token has no valid content, i.e. is in its initialized state. */ /** Token has no valid content, i.e. is in its initialized state. */
INVALID, INVALID,
/** Token with content, at beginning or in the middle of a line. */ /** Token with content, at the beginning or in the middle of a line. */
TOKEN, TOKEN,
/** Token (which can have content) when the end of file is reached. */ /** Token (which can have content) when the end of file is reached. */

View File

@ -18,14 +18,14 @@
/** /**
* Apache Commons CSV Format Support. * Apache Commons CSV Format Support.
* *
* <p>CSV are widely used as interfaces to legacy systems or manual data-imports. * <p>CSV are widely used as interfaces to legacy systems or manual data imports.
* CSV stands for "Comma Separated Values" (or sometimes "Character Separated * CSV stands for "Comma Separated Values" (or sometimes "Character Separated
* Values"). The CSV data format is defined in * Values"). The CSV data format is defined in
* <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> * <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a>
* but many dialects exist.</p> * but many dialects exist.</p>
* *
* <p>Common to all file dialects is its basic structure: The CSV data-format * <p>Common to all file dialects is its basic structure: The CSV data-format
* is record oriented, whereas each record starts on a new textual line. A * is record-oriented, whereas each record starts on a new textual line. A
* record is build of a list of values. Keep in mind that not all records * record is build of a list of values. Keep in mind that not all records
* must have an equal number of values:</p> * must have an equal number of values:</p>
* <pre> * <pre>
@ -36,28 +36,28 @@
* <p>The following list contains the CSV aspects the Commons CSV parser supports:</p> * <p>The following list contains the CSV aspects the Commons CSV parser supports:</p>
* <dl> * <dl>
* <dt>Separators (for lines)</dt> * <dt>Separators (for lines)</dt>
* <dd>The record separators are hardcoded and cannot be changed. The must be '\r', '\n' or '\r\n'.</dd> * <dd>The record separators are hardcoded and cannot be changed. The must be '\r', '\n', or '\r\n'.</dd>
* *
* <dt>Delimiter (for values)</dt> * <dt>Delimiter (for values)</dt>
* <dd>The delimiter for values is freely configurable (default ',').</dd> * <dd>The delimiter for values is freely configurable (default ',').</dd>
* *
* <dt>Comments</dt> * <dt>Comments</dt>
* <dd>Some CSV-dialects support a simple comment syntax. A comment is a record * <dd>Some CSV dialects support a simple comment syntax. A comment is a record
* which must start with a designated character (the commentStarter). A record * which must start with a designated character (the commentStarter). A record
* of this kind is treated as comment and gets removed from the input (default none)</dd> * of this kind is treated as a comment and gets removed from the input (default none)</dd>
* *
* <dt>Encapsulator</dt> * <dt>Encapsulator</dt>
* <dd>Two encapsulator characters (default '"') are used to enclose -&gt; complex values.</dd> * <dd>Two encapsulator characters (default '"') are used to enclose -&gt; complex values.</dd>
* *
* <dt>Simple values</dt> * <dt>Simple values</dt>
* <dd>A simple value consist of all characters (except the delimiter) until * <dd>A simple value consists of all characters (except the delimiter) until
* (but not including) the next delimiter or a record-terminator. Optionally * (but not including) the next delimiter or a record terminator. Optionally
* all surrounding whitespaces of a simple value can be ignored (default: true).</dd> * all surrounding whitespaces of a simple value can be ignored (default: true).</dd>
* *
* <dt>Complex values</dt> * <dt>Complex values</dt>
* <dd>Complex values are encapsulated within a pair of the defined encapsulator characters. * <dd>Complex values are encapsulated within a pair of the defined encapsulator characters.
* The encapsulator itself must be escaped or doubled when used inside complex values. * The encapsulator itself must be escaped or doubled when used inside complex values.
* Complex values preserve all kind of formatting (including newlines -&gt; multiline-values)</dd> * Complex values preserve all kinds of formatting (including newlines -&gt; multiline-values)</dd>
* *
* <dt>Empty line skipping</dt> * <dt>Empty line skipping</dt>
* <dd>Optionally empty lines in CSV files can be skipped. * <dd>Optionally empty lines in CSV files can be skipped.