Merge branch 'apache:master' into CSV-147

This commit is contained in:
Buddhi De Silva 2023-08-30 19:23:35 +05:30 committed by GitHub
commit 156f1f5071
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 109 additions and 108 deletions

View File

@ -20,7 +20,7 @@
<parent>
<groupId>org.apache.commons</groupId>
<artifactId>commons-parent</artifactId>
<version>60</version>
<version>61</version>
</parent>
<artifactId>commons-csv</artifactId>
<version>1.10.1-SNAPSHOT</version>

View File

@ -50,9 +50,10 @@
<action type="fix" dev="ggregory" due-to="step-security-bot">[StepSecurity] CI: Harden GitHub Actions #329, #330.</action>
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-io:commons-io: from 2.11.0 to 2.13.0.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-parent from 57 to 60.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-parent from 57 to 61.</action>
<action type="update" dev="ggregory" due-to="Dependabot">Bump h2 from 2.1.214 to 2.2.220 #333.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-lang3 from 3.12.0 to 3.13.0.</action>
<action type="update" dev="ggregory" due-to="Buddhi De Silva, Michael Osipov, Gary Gregory">Update exception message in CSVRecord#getNextRecord() #348.</action>
</release>
<release version="1.10.0" date="2023-01-28" description="Feature and bug fix release (Java 8)">
<!-- FIX -->

View File

@ -477,7 +477,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}.
* </p>
*
* @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
* @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return This instance.
*/
public Builder setHeader(final Class<? extends Enum<?>> headerEnum) {
@ -491,7 +491,7 @@ public final class CSVFormat implements Serializable {
}
/**
* Sets the header from the result set metadata. The header can either be parsed automatically from the input file with:
* Sets the header from the result set metadata. The header can be parsed automatically from the input file with:
*
* <pre>
* builder.setHeader();
@ -506,7 +506,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}.
* </p>
*
* @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
* @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return This instance.
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
*/
@ -515,7 +515,7 @@ public final class CSVFormat implements Serializable {
}
/**
* Sets the header from the result set metadata. The header can either be parsed automatically from the input file with:
* Sets the header from the result set metadata. The header can be parsed automatically from the input file with:
*
* <pre>
* builder.setHeader();
@ -530,7 +530,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}.
* </p>
*
* @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
* @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return This instance.
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
*/
@ -547,7 +547,7 @@ public final class CSVFormat implements Serializable {
}
/**
* Sets the header to the given values. The header can either be parsed automatically from the input file with:
* Sets the header to the given values. The header can be parsed automatically from the input file with:
*
* <pre>
* builder.setHeader();
@ -562,7 +562,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}.
* </p>
*
* @param header the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
* @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return This instance.
*/
public Builder setHeader(final String... header) {
@ -909,8 +909,8 @@ public final class CSVFormat implements Serializable {
false, false, false, DuplicateHeaderMode.ALLOW_ALL);
/**
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
* to customize this format to accommodate to your regional settings.
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale-dependent, it might be necessary
* to customize this format to accommodate your regional settings.
*
* <p>
* For example for parsing or generating a CSV file on a French system the following format will be used:
@ -949,7 +949,7 @@ public final class CSVFormat implements Serializable {
* Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation.
*
* <p>
* This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
* This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
* The default NULL string is {@code "\\N"}.
* </p>
*
@ -981,7 +981,7 @@ public final class CSVFormat implements Serializable {
* Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.)
*
* <p>
* This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
* This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
* The default NULL string is {@code "\\N"}.
* </p>
*
@ -1084,7 +1084,7 @@ public final class CSVFormat implements Serializable {
* Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations.
*
* <p>
* This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
* This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special
* characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
* </p>
*
@ -1121,7 +1121,7 @@ public final class CSVFormat implements Serializable {
* Default Oracle format used by the SQL*Loader utility.
*
* <p>
* This is a comma-delimited format with the system line separator character as the record separator.Values are
* This is a comma-delimited format with the system line separator character as the record separator. Values are
* double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is
* {@code ""}. Values are trimmed.
* </p>
@ -1161,7 +1161,7 @@ public final class CSVFormat implements Serializable {
* Default PostgreSQL CSV format used by the {@code COPY} operation.
*
* <p>
* This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special
* This is a comma-delimited format with an LF character as the line separator. Values are double quoted and special
* characters are not escaped. The default NULL string is {@code ""}.
* </p>
*
@ -1199,7 +1199,7 @@ public final class CSVFormat implements Serializable {
* Default PostgreSQL text format used by the {@code COPY} operation.
*
* <p>
* This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
* This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special
* characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}.
* </p>
*
@ -1890,7 +1890,7 @@ public final class CSVFormat implements Serializable {
}
/**
* Tests whether escape are being processed.
* Tests whether escapes are being processed.
*
* @return {@code true} if escapes are processed
*/
@ -1899,7 +1899,7 @@ public final class CSVFormat implements Serializable {
}
/**
* Tests whether a nullString has been defined.
* Tests whether a null string has been defined.
*
* @return {@code true} if a nullString is defined
*/
@ -2009,7 +2009,7 @@ public final class CSVFormat implements Serializable {
if (object == null) {
out.append(value);
} else if (isQuoteCharacterSet()) {
// the original object is needed so can check for Number
// The original object is needed so can check for Number
printWithQuotes(object, value, out, newRecord);
} else if (isEscapeCharacterSet()) {
printWithEscapes(value, out);
@ -2086,7 +2086,7 @@ public final class CSVFormat implements Serializable {
}
/**
* Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the record separator.
* Prints the given {@code values} to {@code out} as a single record of delimiter-separated values followed by the record separator.
*
* <p>
* The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing
@ -2241,7 +2241,7 @@ public final class CSVFormat implements Serializable {
return;
case MINIMAL:
if (len <= 0) {
// always quote an empty token that is the first
// Always quote an empty token that is the first
// on the line, as it may be the only thing on the
// line. If it were not quoted in that case,
// an empty line has no tokens.
@ -2279,7 +2279,7 @@ public final class CSVFormat implements Serializable {
}
if (!quote) {
// no encapsulation needed - write out the original value
// No encapsulation needed - write out the original value
out.append(charSeq, start, len);
return;
}
@ -2289,12 +2289,12 @@ public final class CSVFormat implements Serializable {
}
if (!quote) {
// no encapsulation needed - write out the original value
// No encapsulation needed - write out the original value
out.append(charSeq, start, len);
return;
}
// we hit something that needed encapsulation
// We hit something that needed encapsulation
out.append(quoteChar);
// Pick up where we left off: pos should be positioned on the first character that caused
@ -2310,13 +2310,13 @@ public final class CSVFormat implements Serializable {
pos++;
}
// write the last segment
// Write the last segment
out.append(charSeq, start, pos);
out.append(quoteChar);
}
/**
* Always use quotes unless QuoteMode is NONE, so we not have to look ahead.
* Always use quotes unless QuoteMode is NONE, so we do not have to look ahead.
*
* @param reader What to print
* @param appendable Where to print it
@ -2417,8 +2417,8 @@ public final class CSVFormat implements Serializable {
/**
* Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary.
* <p>
* Because an instance can be used for both writing an parsing, not all conditions can be tested here. For example allowMissingColumnNames is only used for
* parsing, so it cannot be used here.
* Because an instance can be used for both writing and parsing, not all conditions can be tested here. For example, allowMissingColumnNames is only used
* for parsing, so it cannot be used here.
* </p>
*
* @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes.
@ -2570,7 +2570,7 @@ public final class CSVFormat implements Serializable {
* Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character.
*
* @param delimiter the delimiter character
* @return A new CSVFormat that is equal to this with the specified character as delimiter
* @return A new CSVFormat that is equal to this with the specified character as a delimiter
* @throws IllegalArgumentException thrown if the specified character is a line break
* @deprecated Use {@link Builder#setDelimiter(char)}
*/
@ -2679,7 +2679,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}.
* </p>
*
* @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
* @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return A new CSVFormat that is equal to this but with the specified header
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
* @since 1.1
@ -2735,7 +2735,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}.
* </p>
*
* @param header the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
* @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return A new CSVFormat that is equal to this but with the specified header
* @see Builder#setSkipHeaderRecord(boolean)
* @deprecated Use {@link Builder#setHeader(String...)}
@ -2793,7 +2793,7 @@ public final class CSVFormat implements Serializable {
/**
* Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}.
*
* @return A new CSVFormat that will ignore case header name.
* @return A new CSVFormat that will ignore the new case header name behavior.
* @see Builder#setIgnoreHeaderCase(boolean)
* @since 1.3
* @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)}

View File

@ -53,7 +53,7 @@ import java.util.stream.StreamSupport;
* Because CSV appears in many different dialects, the parser supports many formats by allowing the
* specification of a {@link CSVFormat}.
*
* The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream.
* The parser works record-wise. It is not possible to go back, once a record has been parsed from the input stream.
*
* <h2>Creating instances</h2>
* <p>
@ -105,13 +105,13 @@ import java.util.stream.StreamSupport;
* </pre>
*
* <p>
* If the predefined formats don't match the format at hands, custom formats can be defined. More information about
* customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
* If the predefined formats don't match the format at hand, custom formats can be defined. More information about
* customizing CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
* </p>
*
* <h2>Parsing into memory</h2>
* <p>
* If parsing record wise is not desired, the contents of the input can be read completely into memory.
* If parsing record-wise is not desired, the contents of the input can be read completely into memory.
* </p>
*
* <pre>
@ -126,14 +126,14 @@ import java.util.stream.StreamSupport;
*
* <ol>
* <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
* the input, those records will not end up in the in memory representation of your CSV data.</li>
* <li>Parsing into memory may consume a lot of system resources depending on the input. For example if you're
* the input, those records will not end up in the in-memory representation of your CSV data.</li>
* <li>Parsing into memory may consume a lot of system resources depending on the input. For example, if you're
* parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
* </ol>
*
* <h2>Notes</h2>
* <p>
* Internal parser state is completely covered by the format and the reader-state.
* The internal parser state is completely covered by the format and the reader state.
* </p>
*
* @see <a href="package-summary.html">package documentation for more details</a>
@ -147,7 +147,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
try {
return CSVParser.this.nextRecord();
} catch (final IOException e) {
throw new UncheckedIOException(e.getClass().getSimpleName() + " reading next record: " + e.toString(), e);
throw new UncheckedIOException("Exception reading next record: " + e.toString(), e);
}
}
@ -304,8 +304,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
return new CSVParser(reader, format);
}
// the following objects are shared to reduce garbage
/**
* Creates a parser for the given {@link String}.
*
@ -423,7 +421,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
* @param recordNumber
* The next record number to assign
* @throws IllegalArgumentException
* If the parameters of the format are inconsistent or if either reader or format are null.
* If the parameters of the format are inconsistent or if either the reader or format is null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
* @since 1.1
@ -702,11 +700,11 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
}
/**
* Handle whether input is parsed as null
* Handles whether the input is parsed as null
*
* @param input
* the cell data to further processed
* @return null if input is parsed as null, or input itself if input isn't parsed as null
* @return null if input is parsed as null, or input itself if the input isn't parsed as null
*/
private String handleNull(final String input) {
final boolean isQuoted = this.reusableToken.isQuoted;
@ -773,7 +771,7 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
* Returns the record iterator.
*
* <p>
* An {@link IOException} caught during the iteration are re-thrown as an
* An {@link IOException} caught during the iteration is re-thrown as an
* {@link IllegalStateException}.
* </p>
* <p>

View File

@ -171,7 +171,7 @@ public final class CSVPrinter implements Flushable, Closeable {
}
/**
* Prints a comment on a new line among the delimiter separated values.
* Prints a comment on a new line among the delimiter-separated values.
*
* <p>
* Comments will always begin on a new line and occupy at least one full line. The character specified to start
@ -184,7 +184,7 @@ public final class CSVPrinter implements Flushable, Closeable {
*
* <p>This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()}
* to start a new line of the comment. Note that this might produce unexpected results for formats that do not use
* line breaks as record separator.</p>
* line breaks as record separators.</p>
*
* @param comment
* the comment to output
@ -224,7 +224,7 @@ public final class CSVPrinter implements Flushable, Closeable {
/**
* Prints headers for a result set based on its metadata.
*
* @param resultSet The result set to query for metadata.
* @param resultSet The ResultSet to query for metadata.
* @throws IOException If an I/O error occurs.
* @throws SQLException If a database access error occurs or this method is called on a closed result set.
* @since 1.9.0
@ -245,7 +245,7 @@ public final class CSVPrinter implements Flushable, Closeable {
}
/**
* Prints the given values as a single record of delimiter separated values followed by the record separator.
* Prints the given values as a single record of delimiter-separated values followed by the record separator.
*
* <p>
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
@ -265,7 +265,7 @@ public final class CSVPrinter implements Flushable, Closeable {
}
/**
* Prints the given values as a single record of delimiter separated values followed by the record separator.
* Prints the given values as a single record of delimiter-separated values followed by the record separator.
*
* <p>
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
@ -282,7 +282,7 @@ public final class CSVPrinter implements Flushable, Closeable {
}
/**
* Prints the given values as a single record of delimiter separated values followed by the record separator.
* Prints the given values as a single record of delimiter-separated values followed by the record separator.
*
* <p>
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
@ -366,7 +366,7 @@ public final class CSVPrinter implements Flushable, Closeable {
*
* <p>
* If the given array only contains simple objects, this method will print a single record like
* {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested
* {@link #printRecord(Object...)}. If the given collections contain nested collections or arrays, those nested
* elements will each be printed as records using {@link #printRecord(Object...)}.
* </p>
*
@ -408,11 +408,11 @@ public final class CSVPrinter implements Flushable, Closeable {
* Prints all the objects in the given JDBC result set.
*
* @param resultSet
* result set the values to print.
* The values to print.
* @throws IOException
* If an I/O error occurs
* If an I/O error occurs.
* @throws SQLException
* if a database access error occurs
* Thrown when a database access error occurs.
*/
public void printRecords(final ResultSet resultSet) throws SQLException, IOException {
final int columnCount = resultSet.getMetaData().getColumnCount();

View File

@ -32,10 +32,10 @@ import java.util.stream.Stream;
* <p>
* Note: Support for {@link Serializable} is scheduled to be removed in version 2.0.
* In version 1.8 the mapping between the column header and the column index was
* removed from the serialised state. The class maintains serialization compatibility
* removed from the serialized state. The class maintains serialization compatibility
* with versions pre-1.8 for the record values; these must be accessed by index
* following deserialization. There will be loss of any functionally linked to the header
* mapping when transferring serialised forms pre-1.8 to 1.8 and vice versa.
* following deserialization. There will be a loss of any functionally linked to the header
* mapping when transferring serialized forms pre-1.8 to 1.8 and vice versa.
* </p>
*/
public final class CSVRecord implements Serializable, Iterable<String> {
@ -143,8 +143,8 @@ public final class CSVRecord implements Serializable, Iterable<String> {
/**
* Returns the comment for this record, if any.
* Note that comments are attached to the following record.
* If there is no following record (i.e. the comment is at EOF)
* the comment will be ignored.
* If there is no following record (i.e. the comment is at EOF),
* then the comment will be ignored.
*
* @return the comment for this record, or null if no comment for this record is available.
*/
@ -189,8 +189,8 @@ public final class CSVRecord implements Serializable, Iterable<String> {
/**
* Checks whether this record has a comment, false otherwise.
* Note that comments are attached to the following record.
* If there is no following record (i.e. the comment is at EOF)
* the comment will be ignored.
* If there is no following record (i.e. the comment is at EOF),
* then the comment will be ignored.
*
* @return true if this record has a comment, false otherwise
* @since 1.3
@ -227,22 +227,22 @@ public final class CSVRecord implements Serializable, Iterable<String> {
}
/**
* Checks whether a column with given index has a value.
* Checks whether a column with a given index has a value.
*
* @param index
* a column index (0-based)
* @return whether a column with given index has a value
* @return whether a column with a given index has a value
*/
public boolean isSet(final int index) {
return 0 <= index && index < values.length;
}
/**
* Checks whether a given columns is mapped and has a value.
* Checks whether a given column is mapped and has a value.
*
* @param name
* the name of the column to be retrieved.
* @return whether a given columns is mapped and has a value
* @return whether a given column is mapped and has a value
*/
public boolean isSet(final String name) {
return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
@ -311,8 +311,8 @@ public final class CSVRecord implements Serializable, Iterable<String> {
}
/**
* Copies this record into a new Map of header name to record value. If multiple instances of a header name exists,
* only the last occurrence is mapped.
* Copies this record into a new Map of header name to record value. If multiple instances of a header name exist,
* then only the last occurrence is mapped.
*
* <p>
* Editing the map does not update this instance.

View File

@ -41,13 +41,13 @@ final class ExtendedBufferedReader extends BufferedReader {
/** The count of EOLs (CR/LF/CRLF) seen so far */
private long eolCounter;
/** The position, which is number of characters read so far */
/** The position, which is the number of characters read so far */
private long position;
private boolean closed;
/**
* Created extended buffered reader using default buffer-size
* Constructs a new instance using the default buffer size.
*/
ExtendedBufferedReader(final Reader reader) {
super(reader);
@ -107,7 +107,7 @@ final class ExtendedBufferedReader extends BufferedReader {
/**
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
* still return this value. Does not affect line number or last character.
* still return this value. Does not affect the line number or the last character.
*
* @return the next character
*
@ -125,7 +125,7 @@ final class ExtendedBufferedReader extends BufferedReader {
/**
* Populates the buffer with the next {@code buf.length} characters in the
* current reader without consuming them. The next call to {@link #read()} will
* still return the next value. This doesn't affect line number or last
* still return the next value. This doesn't affect the line number or the last
* character.
*
* @param buf the buffer to fill for the look ahead.
@ -199,7 +199,7 @@ final class ExtendedBufferedReader extends BufferedReader {
/**
* Gets the next line, dropping the line terminator(s). This method should only be called when processing a
* comment, otherwise information can be lost.
* comment, otherwise, information can be lost.
* <p>
* Increments {@link #eolCounter} and updates {@link #position}.
* </p>

View File

@ -42,7 +42,7 @@ final class Lexer implements Closeable {
private static final String LF_STRING = Character.toString(LF);
/**
* Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it
* Constant char to use for disabling comments, escapes, and encapsulation. The value -2 is used because it
* won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two
* chars (using surrogates) and thus there should never be a collision with a real text char.
*/
@ -146,9 +146,9 @@ final class Lexer implements Closeable {
}
/**
* Tests if the given character indicates end of file.
* Tests if the given character indicates the end of the file.
*
* @return true if the given character indicates end of file.
* @return true if the given character indicates the end of the file.
*/
boolean isEndOfFile(final int ch) {
return ch == END_OF_STREAM;
@ -168,7 +168,7 @@ final class Lexer implements Closeable {
*
* For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]".
*
* @return true if the next characters constitute a escape delimiter.
* @return true if the next characters constitute an escape delimiter.
* @throws IOException If an I/O error occurs.
*/
boolean isEscapeDelimiter() throws IOException {
@ -194,7 +194,7 @@ final class Lexer implements Closeable {
}
/**
* Tests if the current character represents the start of a line: a CR, LF or is at the start of the file.
* Tests if the current character represents the start of a line: a CR, LF, or is at the start of the file.
*
* @param ch the character to check
* @return true if the character is at the start of a line.
@ -214,13 +214,13 @@ final class Lexer implements Closeable {
* </p>
*
* @param token
* an existing Token object to reuse. The caller is responsible to initialize the Token.
* an existing Token object to reuse. The caller is responsible for initializing the Token.
* @return the next token found.
* @throws IOException on stream access error.
*/
Token nextToken(final Token token) throws IOException {
// get the last read char (required for empty line detection)
// Get the last read char (required for empty line detection)
int lastChar = reader.getLastChar();
// read the next char and set eol
@ -234,11 +234,11 @@ final class Lexer implements Closeable {
// empty line detection: eol AND (last char was EOL or beginning)
if (ignoreEmptyLines) {
while (eol && isStartOfLine(lastChar)) {
// go on char ahead ...
// Go on char ahead ...
lastChar = c;
c = reader.read();
eol = readEndOfLine(c);
// reached end of file without any content (empty line at the end)
// reached the end of the file without any content (empty line at the end)
if (isEndOfFile(c)) {
token.type = EOF;
// don't set token.isReady here because no content
@ -247,7 +247,7 @@ final class Lexer implements Closeable {
}
}
// did we reach eof during the last iteration already ? EOF
// Did we reach EOF during the last iteration already? EOF
if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) {
token.type = EOF;
// don't set token.isReady here because no content
@ -267,7 +267,7 @@ final class Lexer implements Closeable {
return token;
}
// important: make sure a new char gets consumed in each iteration
// Important: make sure a new char gets consumed in each iteration
while (token.type == INVALID) {
// ignore whitespaces at beginning of a token
if (ignoreSurroundingSpaces) {
@ -305,12 +305,12 @@ final class Lexer implements Closeable {
/**
* Parses an encapsulated token.
* <p>
* Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included
* Encapsulated tokens are surrounded by the given encapsulating string. The encapsulator itself might be included
* in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after
* an encapsulated token are ignored. The token is finished when one of the following conditions become true:
* an encapsulated token is ignored. The token is finished when one of the following conditions becomes true:
* </p>
* <ul>
* <li>an unescaped encapsulator has been reached, and is followed by optional whitespace then:</li>
* <li>An unescaped encapsulator has been reached and is followed by optional whitespace then:</li>
* <ul>
* <li>delimiter (TOKEN)</li>
* <li>end of line (EORECORD)</li>
@ -321,11 +321,12 @@ final class Lexer implements Closeable {
* the current token
* @return a valid token object
* @throws IOException
* on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL
* Thrown when in an invalid state: EOF before closing encapsulator or invalid character before
* delimiter or EOL.
*/
private Token parseEncapsulatedToken(final Token token) throws IOException {
token.isQuoted = true;
// save current line number in case needed for IOE
// Save current line number in case needed for IOE
final long startLineNumber = getCurrentLineNumber();
int c;
while (true) {
@ -385,13 +386,13 @@ final class Lexer implements Closeable {
/**
* Parses a simple token.
* <p>
* Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
* delimiters (as \, or \;). The token is finished when one of the following conditions become true:
* Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped
* delimiters (as \, or \;). The token is finished when one of the following conditions becomes true:
* </p>
* <ul>
* <li>end of line has been reached (EORECORD)</li>
* <li>end of stream has been reached (EOF)</li>
* <li>an unescaped delimiter has been reached (TOKEN)</li>
* <li>The end of line has been reached (EORECORD)</li>
* <li>The end of stream has been reached (EOF)</li>
* <li>An unescaped delimiter has been reached (TOKEN)</li>
* </ul>
*
* @param token

View File

@ -32,7 +32,7 @@ public enum QuoteMode {
ALL_NON_NULL,
/**
* Quotes fields which contain special characters such as a the field delimiter, quote character or any of the
* Quotes fields that contain special characters such as a field delimiter, quote character, or any of the
* characters in the line separator string.
*/
MINIMAL,

View File

@ -21,8 +21,9 @@ import static org.apache.commons.csv.Token.Type.INVALID;
/**
* Internal token representation.
* <p/>
* It is used as contract between the lexer and the parser.
* <p>
* It is used as a contract between the lexer and the parser.
* </p>
*/
final class Token {
@ -30,7 +31,7 @@ final class Token {
/** Token has no valid content, i.e. is in its initialized state. */
INVALID,
/** Token with content, at beginning or in the middle of a line. */
/** Token with content, at the beginning or in the middle of a line. */
TOKEN,
/** Token (which can have content) when the end of file is reached. */

View File

@ -18,14 +18,14 @@
/**
* Apache Commons CSV Format Support.
*
* <p>CSV are widely used as interfaces to legacy systems or manual data-imports.
* <p>CSV are widely used as interfaces to legacy systems or manual data imports.
* CSV stands for "Comma Separated Values" (or sometimes "Character Separated
* Values"). The CSV data format is defined in
* <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a>
* but many dialects exist.</p>
*
* <p>Common to all file dialects is its basic structure: The CSV data-format
* is record oriented, whereas each record starts on a new textual line. A
* is record-oriented, whereas each record starts on a new textual line. A
* record is build of a list of values. Keep in mind that not all records
* must have an equal number of values:</p>
* <pre>
@ -36,28 +36,28 @@
* <p>The following list contains the CSV aspects the Commons CSV parser supports:</p>
* <dl>
* <dt>Separators (for lines)</dt>
* <dd>The record separators are hardcoded and cannot be changed. The must be '\r', '\n' or '\r\n'.</dd>
* <dd>The record separators are hardcoded and cannot be changed. The must be '\r', '\n', or '\r\n'.</dd>
*
* <dt>Delimiter (for values)</dt>
* <dd>The delimiter for values is freely configurable (default ',').</dd>
*
* <dt>Comments</dt>
* <dd>Some CSV-dialects support a simple comment syntax. A comment is a record
* <dd>Some CSV dialects support a simple comment syntax. A comment is a record
* which must start with a designated character (the commentStarter). A record
* of this kind is treated as comment and gets removed from the input (default none)</dd>
* of this kind is treated as a comment and gets removed from the input (default none)</dd>
*
* <dt>Encapsulator</dt>
* <dd>Two encapsulator characters (default '"') are used to enclose -&gt; complex values.</dd>
*
* <dt>Simple values</dt>
* <dd>A simple value consist of all characters (except the delimiter) until
* (but not including) the next delimiter or a record-terminator. Optionally
* <dd>A simple value consists of all characters (except the delimiter) until
* (but not including) the next delimiter or a record terminator. Optionally
* all surrounding whitespaces of a simple value can be ignored (default: true).</dd>
*
* <dt>Complex values</dt>
* <dd>Complex values are encapsulated within a pair of the defined encapsulator characters.
* The encapsulator itself must be escaped or doubled when used inside complex values.
* Complex values preserve all kind of formatting (including newlines -&gt; multiline-values)</dd>
* Complex values preserve all kinds of formatting (including newlines -&gt; multiline-values)</dd>
*
* <dt>Empty line skipping</dt>
* <dd>Optionally empty lines in CSV files can be skipped.