diff --git a/src/test/resources/org/apache/commons/csv/CSVFormat.java b/src/test/resources/org/apache/commons/csv/CSVFormat.java deleted file mode 100644 index c00f993f..00000000 --- a/src/test/resources/org/apache/commons/csv/CSVFormat.java +++ /dev/null @@ -1,2330 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.BACKSLASH; -import static org.apache.commons.csv.Constants.COMMA; -import static org.apache.commons.csv.Constants.COMMENT; -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.CRLF; -import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR; -import static org.apache.commons.csv.Constants.EMPTY; -import static org.apache.commons.csv.Constants.LF; -import static org.apache.commons.csv.Constants.PIPE; -import static org.apache.commons.csv.Constants.SP; -import static org.apache.commons.csv.Constants.TAB; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Reader; -import java.io.Serializable; -import java.io.StringWriter; -import java.io.Writer; -import java.nio.charset.Charset; -import java.nio.file.Files; -import java.nio.file.Path; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; - -/** - * Specifies the format of a CSV file and parses input. - * - *

Using predefined formats

- * - *

- * You can use one of the predefined formats: - *

- * - * - * - *

- * For example: - *

- * - *
- * CSVParser parser = CSVFormat.EXCEL.parse(reader);
- * 
- * - *

- * The {@link CSVParser} provides static methods to parse other input types, for example: - *

- * - *
- * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL);
- * 
- * - *

Defining formats

- * - *

- * You can extend a format by calling the {@code with} methods. For example: - *

- * - *
- * CSVFormat.EXCEL.withNullString("N/A").withIgnoreSurroundingSpaces(true);
- * 
- * - *

Defining column names

- * - *

- * To define the column names you want to use to access records, write: - *

- * - *
- * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3");
- * 
- * - *

- * Calling {@link #withHeader(String...)} lets you use the given names to address values in a {@link CSVRecord}, and - * assumes that your CSV source does not contain a first record that also defines column names. - * - * If it does, then you are overriding this metadata with your names and you should skip the first record by calling - * {@link #withSkipHeaderRecord(boolean)} with {@code true}. - *

- * - *

Parsing

- * - *

- * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write: - *

- * - *
- * Reader in = ...;
- * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3").parse(in);
- * 
- * - *

- * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}. - *

- * - *

Referencing columns safely

- * - *

- * If your source contains a header record, you can simplify your code and safely reference columns, by using - * {@link #withHeader(String...)} with no arguments: - *

- * - *
- * CSVFormat.EXCEL.withHeader();
- * 
- * - *

- * This causes the parser to read the first record and use its values as column names. - * - * Then, call one of the {@link CSVRecord} get method that takes a String column name argument: - *

- * - *
- * String value = record.get("Col1");
- * 
- * - *

- * This makes your code impervious to changes in column order in the CSV file. - *

- * - *

Notes

- * - *

- * This class is immutable. - *

- */ -public final class CSVFormat implements Serializable { - - /** - * Predefines formats. - * - * @since 1.2 - */ - public enum Predefined { - - /** - * @see CSVFormat#DEFAULT - */ - Default(CSVFormat.DEFAULT), - - /** - * @see CSVFormat#EXCEL - */ - Excel(CSVFormat.EXCEL), - - /** - * @see CSVFormat#INFORMIX_UNLOAD - * @since 1.3 - */ - InformixUnload(CSVFormat.INFORMIX_UNLOAD), - - /** - * @see CSVFormat#INFORMIX_UNLOAD_CSV - * @since 1.3 - */ - InformixUnloadCsv(CSVFormat.INFORMIX_UNLOAD_CSV), - - /** - * @see CSVFormat#MONGODB_CSV - * @since 1.7 - */ - MongoDBCsv(CSVFormat.MONGODB_CSV), - - /** - * @see CSVFormat#MONGODB_TSV - * @since 1.7 - */ - MongoDBTsv(CSVFormat.MONGODB_TSV), - - /** - * @see CSVFormat#MYSQL - */ - MySQL(CSVFormat.MYSQL), - - /** - * @see CSVFormat#ORACLE - */ - Oracle(CSVFormat.ORACLE), - - /** - * @see CSVFormat#POSTGRESQL_CSV - * @since 1.5 - */ - PostgreSQLCsv(CSVFormat.POSTGRESQL_CSV), - - /** - * @see CSVFormat#POSTGRESQL_CSV - */ - PostgreSQLText(CSVFormat.POSTGRESQL_TEXT), - - /** - * @see CSVFormat#RFC4180 - */ - RFC4180(CSVFormat.RFC4180), - - /** - * @see CSVFormat#TDF - */ - TDF(CSVFormat.TDF); - - private final CSVFormat format; - - Predefined(final CSVFormat format) { - this.format = format; - } - - /** - * Gets the format. - * - * @return the format. - */ - public CSVFormat getFormat() { - return format; - } - } - - /** - * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines. - * - *

- * Settings are: - *

- * - * - * @see Predefined#Default - */ - public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, - null, null, null, false, false, false, false, false, false, true); - - /** - * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is - * locale dependent, it might be necessary to customize this format to accommodate to your regional settings. - * - *

- * For example for parsing or generating a CSV file on a French system the following format will be used: - *

- * - *
-     * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
-     * 
- * - *

- * Settings are: - *

- * - *

- * Note: This is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean) - * withAllowMissingColumnNames(true)} and {@link #withIgnoreEmptyLines(boolean) withIgnoreEmptyLines(false)}. - *

- * - * @see Predefined#Excel - */ - // @formatter:off - public static final CSVFormat EXCEL = DEFAULT - .withIgnoreEmptyLines(false) - .withAllowMissingColumnNames(); - // @formatter:on - - /** - * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation. - * - *

- * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special - * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. - *

- * - *

- * Settings are: - *

- * - * - * @see Predefined#MySQL - * @see - * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm - * @since 1.3 - */ - // @formatter:off - public static final CSVFormat INFORMIX_UNLOAD = DEFAULT - .withDelimiter(PIPE) - .withEscape(BACKSLASH) - .withQuote(DOUBLE_QUOTE_CHAR) - .withRecordSeparator(LF); - // @formatter:on - - /** - * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.) - * - *

- * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special - * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. - *

- * - *

- * Settings are: - *

- * - * - * @see Predefined#MySQL - * @see - * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm - * @since 1.3 - */ - // @formatter:off - public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT - .withDelimiter(COMMA) - .withQuote(DOUBLE_QUOTE_CHAR) - .withRecordSeparator(LF); - // @formatter:on - - /** - * Default MongoDB CSV format used by the {@code mongoexport} operation. - *

- * Parsing is not supported yet. - *

- * - *

- * This is a comma-delimited format. Values are double quoted only if needed and special characters are escaped with - * {@code '"'}. A header line with field names is expected. - *

- * - *

- * Settings are: - *

- * - * - * @see Predefined#MongoDBCsv - * @see MongoDB mongoexport command - * documentation - * @since 1.7 - */ - // @formatter:off - public static final CSVFormat MONGODB_CSV = DEFAULT - .withDelimiter(COMMA) - .withEscape(DOUBLE_QUOTE_CHAR) - .withQuote(DOUBLE_QUOTE_CHAR) - .withQuoteMode(QuoteMode.MINIMAL) - .withSkipHeaderRecord(false); - // @formatter:off - - /** - * Default MongoDB TSV format used by the {@code mongoexport} operation. - *

- * Parsing is not supported yet. - *

- * - *

- * This is a tab-delimited format. Values are double quoted only if needed and special - * characters are escaped with {@code '"'}. A header line with field names is expected. - *

- * - *

- * Settings are: - *

- * - * - * @see Predefined#MongoDBCsv - * @see MongoDB mongoexport command - * documentation - * @since 1.7 - */ - // @formatter:off - public static final CSVFormat MONGODB_TSV = DEFAULT - .withDelimiter(TAB) - .withEscape(DOUBLE_QUOTE_CHAR) - .withQuote(DOUBLE_QUOTE_CHAR) - .withQuoteMode(QuoteMode.MINIMAL) - .withSkipHeaderRecord(false); - // @formatter:off - - /** - * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations. - * - *

- * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special - * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. - *

- * - *

- * Settings are: - *

- * - * - * @see Predefined#MySQL - * @see http://dev.mysql.com/doc/refman/5.1/en/load - * -data.html - */ - // @formatter:off - public static final CSVFormat MYSQL = DEFAULT - .withDelimiter(TAB) - .withEscape(BACKSLASH) - .withIgnoreEmptyLines(false) - .withQuote(null) - .withRecordSeparator(LF) - .withNullString("\\N") - .withQuoteMode(QuoteMode.ALL_NON_NULL); - // @formatter:off - - /** - * Default Oracle format used by the SQL*Loader utility. - * - *

- * This is a comma-delimited format with the system line separator character as the record separator.Values are - * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is - * {@code ""}. Values are trimmed. - *

- * - *

- * Settings are: - *

- * - * - * @see Predefined#Oracle - * @see Oracle CSV Format Specification - * @since 1.6 - */ - // @formatter:off - public static final CSVFormat ORACLE = DEFAULT - .withDelimiter(COMMA) - .withEscape(BACKSLASH) - .withIgnoreEmptyLines(false) - .withQuote(DOUBLE_QUOTE_CHAR) - .withNullString("\\N") - .withTrim() - .withSystemRecordSeparator() - .withQuoteMode(QuoteMode.MINIMAL); - // @formatter:off - - /** - * Default PostgreSQL CSV format used by the {@code COPY} operation. - * - *

- * This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special - * characters are escaped with {@code '"'}. The default NULL string is {@code ""}. - *

- * - *

- * Settings are: - *

- * - * - * @see Predefined#MySQL - * @see PostgreSQL COPY command - * documentation - * @since 1.5 - */ - // @formatter:off - public static final CSVFormat POSTGRESQL_CSV = DEFAULT - .withDelimiter(COMMA) - .withEscape(DOUBLE_QUOTE_CHAR) - .withIgnoreEmptyLines(false) - .withQuote(DOUBLE_QUOTE_CHAR) - .withRecordSeparator(LF) - .withNullString(EMPTY) - .withQuoteMode(QuoteMode.ALL_NON_NULL); - // @formatter:off - - /** - * Default PostgreSQL text format used by the {@code COPY} operation. - * - *

- * This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special - * characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}. - *

- * - *

- * Settings are: - *

- * - * - * @see Predefined#MySQL - * @see PostgreSQL COPY command - * documentation - * @since 1.5 - */ - // @formatter:off - public static final CSVFormat POSTGRESQL_TEXT = DEFAULT - .withDelimiter(TAB) - .withEscape(BACKSLASH) - .withIgnoreEmptyLines(false) - .withQuote(DOUBLE_QUOTE_CHAR) - .withRecordSeparator(LF) - .withNullString("\\N") - .withQuoteMode(QuoteMode.ALL_NON_NULL); - // @formatter:off - - /** - * Comma separated format as defined by RFC 4180. - * - *

- * Settings are: - *

- * - * - * @see Predefined#RFC4180 - */ - public static final CSVFormat RFC4180 = DEFAULT.withIgnoreEmptyLines(false); - - private static final long serialVersionUID = 1L; - - /** - * Tab-delimited format. - * - *

- * Settings are: - *

- * - * - * @see Predefined#TDF - */ - // @formatter:off - public static final CSVFormat TDF = DEFAULT - .withDelimiter(TAB) - .withIgnoreSurroundingSpaces(); - // @formatter:on - - /** - * Returns true if the given character is a line break character. - * - * @param c - * the character to check - * - * @return true if {@code c} is a line break character - */ - private static boolean isLineBreak(final char c) { - return c == LF || c == CR; - } - - /** - * Returns true if the given character is a line break character. - * - * @param c - * the character to check, may be null - * - * @return true if {@code c} is a line break character (and not null) - */ - private static boolean isLineBreak(final Character c) { - return c != null && isLineBreak(c.charValue()); - } - - /** - * Creates a new CSV format with the specified delimiter. - * - *

- * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized - * with null/false. - *

- * - * @param delimiter - * the char used for value separation, must not be a line break character - * @return a new CSV format. - * @throws IllegalArgumentException - * if the delimiter is a line break character - * - * @see #DEFAULT - * @see #RFC4180 - * @see #MYSQL - * @see #EXCEL - * @see #TDF - */ - public static CSVFormat newFormat(final char delimiter) { - return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false, - false, false, false, false, true); - } - - /** - * Gets one of the predefined formats from {@link CSVFormat.Predefined}. - * - * @param format - * name - * @return one of the predefined formats - * @since 1.2 - */ - public static CSVFormat valueOf(final String format) { - return CSVFormat.Predefined.valueOf(format).getFormat(); - } - - private final boolean allowDuplicateHeaderNames; - - private final boolean allowMissingColumnNames; - - private final boolean autoFlush; - - private final Character commentMarker; // null if commenting is disabled - - private final char delimiter; - - private final Character escapeCharacter; // null if escaping is disabled - - private final String[] header; // array of header column names - - private final String[] headerComments; // array of header comment lines - - private final boolean ignoreEmptyLines; - - private final boolean ignoreHeaderCase; // should ignore header names case - - private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values? - - private final String nullString; // the string to be used for null values - - private final Character quoteCharacter; // null if quoting is disabled - - private final String quotedNullString; - - private final QuoteMode quoteMode; - - private final String recordSeparator; // for outputs - - private final boolean skipHeaderRecord; - - private final boolean trailingDelimiter; - - private final boolean trim; - - /** - * Creates a customized CSV format. - * - * @param delimiter - * the char used for value separation, must not be a line break character - * @param quoteChar - * the Character used as value encapsulation marker, may be {@code null} to disable - * @param quoteMode - * the quote mode - * @param commentStart - * the Character used for comment identification, may be {@code null} to disable - * @param escape - * the Character used to escape special characters in values, may be {@code null} to disable - * @param ignoreSurroundingSpaces - * {@code true} when whitespaces enclosing values should be ignored - * @param ignoreEmptyLines - * {@code true} when the parser should skip empty lines - * @param recordSeparator - * the line separator to use for output - * @param nullString - * the line separator to use for output - * @param headerComments - * the comments to be printed by the Printer before the actual CSV data - * @param header - * the header - * @param skipHeaderRecord - * TODO - * @param allowMissingColumnNames - * TODO - * @param ignoreHeaderCase - * TODO - * @param trim - * TODO - * @param trailingDelimiter - * TODO - * @param autoFlush - * @throws IllegalArgumentException - * if the delimiter is a line break character - */ - private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMode quoteMode, - final Character commentStart, final Character escape, final boolean ignoreSurroundingSpaces, - final boolean ignoreEmptyLines, final String recordSeparator, final String nullString, - final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, - final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim, - final boolean trailingDelimiter, final boolean autoFlush, final boolean allowDuplicateHeaderNames) { - this.delimiter = delimiter; - this.quoteCharacter = quoteChar; - this.quoteMode = quoteMode; - this.commentMarker = commentStart; - this.escapeCharacter = escape; - this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; - this.allowMissingColumnNames = allowMissingColumnNames; - this.ignoreEmptyLines = ignoreEmptyLines; - this.recordSeparator = recordSeparator; - this.nullString = nullString; - this.headerComments = toStringArray(headerComments); - this.header = header == null ? null : header.clone(); - this.skipHeaderRecord = skipHeaderRecord; - this.ignoreHeaderCase = ignoreHeaderCase; - this.trailingDelimiter = trailingDelimiter; - this.trim = trim; - this.autoFlush = autoFlush; - this.quotedNullString = quoteCharacter + nullString + quoteCharacter; - this.allowDuplicateHeaderNames = allowDuplicateHeaderNames; - validate(); - } - - @Override - public boolean equals(final Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - - final CSVFormat other = (CSVFormat) obj; - if (delimiter != other.delimiter) { - return false; - } - if (trailingDelimiter != other.trailingDelimiter) { - return false; - } - if (autoFlush != other.autoFlush) { - return false; - } - if (trim != other.trim) { - return false; - } - if (allowMissingColumnNames != other.allowMissingColumnNames) { - return false; - } - if (allowDuplicateHeaderNames != other.allowDuplicateHeaderNames) { - return false; - } - if (ignoreHeaderCase != other.ignoreHeaderCase) { - return false; - } - if (quoteMode != other.quoteMode) { - return false; - } - if (quoteCharacter == null) { - if (other.quoteCharacter != null) { - return false; - } - } else if (!quoteCharacter.equals(other.quoteCharacter)) { - return false; - } - if (commentMarker == null) { - if (other.commentMarker != null) { - return false; - } - } else if (!commentMarker.equals(other.commentMarker)) { - return false; - } - if (escapeCharacter == null) { - if (other.escapeCharacter != null) { - return false; - } - } else if (!escapeCharacter.equals(other.escapeCharacter)) { - return false; - } - if (nullString == null) { - if (other.nullString != null) { - return false; - } - } else if (!nullString.equals(other.nullString)) { - return false; - } - if (!Arrays.equals(header, other.header)) { - return false; - } - if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) { - return false; - } - if (ignoreEmptyLines != other.ignoreEmptyLines) { - return false; - } - if (skipHeaderRecord != other.skipHeaderRecord) { - return false; - } - if (recordSeparator == null) { - if (other.recordSeparator != null) { - return false; - } - } else if (!recordSeparator.equals(other.recordSeparator)) { - return false; - } - if (!Arrays.equals(headerComments, other.headerComments)) { - return false; - } - return true; - } - - /** - * Formats the specified values. - * - * @param values - * the values to format - * @return the formatted values - */ - public String format(final Object... values) { - final StringWriter out = new StringWriter(); - try (CSVPrinter csvPrinter = new CSVPrinter(out, this)) { - csvPrinter.printRecord(values); - String res = out.toString(); - int len = recordSeparator != null ? res.length() - recordSeparator.length() : res.length(); - return res.substring(0, len); - } catch (final IOException e) { - // should not happen because a StringWriter does not do IO. - throw new IllegalStateException(e); - } - } - - /** - * Returns true if and only if duplicate names are allowed in the headers. - * - * @return whether duplicate header names are allowed - * @since 1.7 - */ - public boolean getAllowDuplicateHeaderNames() { - return allowDuplicateHeaderNames; - } - - /** - * Specifies whether missing column names are allowed when parsing the header line. - * - * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an - * {@link IllegalArgumentException}. - */ - public boolean getAllowMissingColumnNames() { - return allowMissingColumnNames; - } - - /** - * Returns whether to flush on close. - * - * @return whether to flush on close. - * @since 1.6 - */ - public boolean getAutoFlush() { - return autoFlush; - } - - /** - * Returns the character marking the start of a line comment. - * - * @return the comment start marker, may be {@code null} - */ - public Character getCommentMarker() { - return commentMarker; - } - - /** - * Returns the character delimiting the values (typically ';', ',' or '\t'). - * - * @return the delimiter character - */ - public char getDelimiter() { - return delimiter; - } - - /** - * Returns the escape character. - * - * @return the escape character, may be {@code null} - */ - public Character getEscapeCharacter() { - return escapeCharacter; - } - - /** - * Returns a copy of the header array. - * - * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file - */ - public String[] getHeader() { - return header != null ? header.clone() : null; - } - - /** - * Returns a copy of the header comment array. - * - * @return a copy of the header comment array; {@code null} if disabled. - */ - public String[] getHeaderComments() { - return headerComments != null ? headerComments.clone() : null; - } - - /** - * Specifies whether empty lines between records are ignored when parsing input. - * - * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty - * records. - */ - public boolean getIgnoreEmptyLines() { - return ignoreEmptyLines; - } - - /** - * Specifies whether header names will be accessed ignoring case. - * - * @return {@code true} if header names cases are ignored, {@code false} if they are case sensitive. - * @since 1.3 - */ - public boolean getIgnoreHeaderCase() { - return ignoreHeaderCase; - } - - /** - * Specifies whether spaces around values are ignored when parsing input. - * - * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value. - */ - public boolean getIgnoreSurroundingSpaces() { - return ignoreSurroundingSpaces; - } - - /** - * Gets the String to convert to and from {@code null}. - * - * - * @return the String to convert to and from {@code null}. No substitution occurs if {@code null} - */ - public String getNullString() { - return nullString; - } - - /** - * Returns the character used to encapsulate values containing special characters. - * - * @return the quoteChar character, may be {@code null} - */ - public Character getQuoteCharacter() { - return quoteCharacter; - } - - /** - * Returns the quote policy output fields. - * - * @return the quote policy - */ - public QuoteMode getQuoteMode() { - return quoteMode; - } - - /** - * Returns the record separator delimiting output records. - * - * @return the record separator - */ - public String getRecordSeparator() { - return recordSeparator; - } - - /** - * Returns whether to skip the header record. - * - * @return whether to skip the header record. - */ - public boolean getSkipHeaderRecord() { - return skipHeaderRecord; - } - - /** - * Returns whether to add a trailing delimiter. - * - * @return whether to add a trailing delimiter. - * @since 1.3 - */ - public boolean getTrailingDelimiter() { - return trailingDelimiter; - } - - /** - * Returns whether to trim leading and trailing blanks. - * This is used by {@link #print(Object, Appendable, boolean)} - * Also by {@link CSVParser#addRecordValue(boolean)} - * - * @return whether to trim leading and trailing blanks. - */ - public boolean getTrim() { - return trim; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - - result = prime * result + delimiter; - result = prime * result + ((quoteMode == null) ? 0 : quoteMode.hashCode()); - result = prime * result + ((quoteCharacter == null) ? 0 : quoteCharacter.hashCode()); - result = prime * result + ((commentMarker == null) ? 0 : commentMarker.hashCode()); - result = prime * result + ((escapeCharacter == null) ? 0 : escapeCharacter.hashCode()); - result = prime * result + ((nullString == null) ? 0 : nullString.hashCode()); - result = prime * result + (ignoreSurroundingSpaces ? 1231 : 1237); - result = prime * result + (ignoreHeaderCase ? 1231 : 1237); - result = prime * result + (ignoreEmptyLines ? 1231 : 1237); - result = prime * result + (skipHeaderRecord ? 1231 : 1237); - result = prime * result + (allowDuplicateHeaderNames ? 1231 : 1237); - result = prime * result + (trim ? 1231 : 1237); - result = prime * result + (autoFlush ? 1231 : 1237); - result = prime * result + (trailingDelimiter ? 1231 : 1237); - result = prime * result + (allowMissingColumnNames ? 1231 : 1237); - result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode()); - result = prime * result + Arrays.hashCode(header); - result = prime * result + Arrays.hashCode(headerComments); - return result; - } - - /** - * Specifies whether comments are supported by this format. - * - * Note that the comment introducer character is only recognized at the start of a line. - * - * @return {@code true} is comments are supported, {@code false} otherwise - */ - public boolean isCommentMarkerSet() { - return commentMarker != null; - } - - /** - * Returns whether escape are being processed. - * - * @return {@code true} if escapes are processed - */ - public boolean isEscapeCharacterSet() { - return escapeCharacter != null; - } - - /** - * Returns whether a nullString has been defined. - * - * @return {@code true} if a nullString is defined - */ - public boolean isNullStringSet() { - return nullString != null; - } - - /** - * Returns whether a quoteChar has been defined. - * - * @return {@code true} if a quoteChar is defined - */ - public boolean isQuoteCharacterSet() { - return quoteCharacter != null; - } - - /** - * Parses the specified content. - * - *

- * See also the various static parse methods on {@link CSVParser}. - *

- * - * @param in - * the input stream - * @return a parser over a stream of {@link CSVRecord}s. - * @throws IOException - * If an I/O error occurs - */ - public CSVParser parse(final Reader in) throws IOException { - return new CSVParser(in, this); - } - - /** - * Prints to the specified output. - * - *

- * See also {@link CSVPrinter}. - *

- * - * @param out - * the output. - * @return a printer to an output. - * @throws IOException - * thrown if the optional header cannot be printed. - */ - public CSVPrinter print(final Appendable out) throws IOException { - return new CSVPrinter(out, this); - } - - /** - * Prints to the specified output. - * - *

- * See also {@link CSVPrinter}. - *

- * - * @param out - * the output. - * @param charset - * A charset. - * @return a printer to an output. - * @throws IOException - * thrown if the optional header cannot be printed. - * @since 1.5 - */ - @SuppressWarnings("resource") - public CSVPrinter print(final File out, final Charset charset) throws IOException { - // The writer will be closed when close() is called. - return new CSVPrinter(new OutputStreamWriter(new FileOutputStream(out), charset), this); - } - - /** - * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated - * as needed. Useful when one wants to avoid creating CSVPrinters. - * Trims the value if {@link #getTrim()} is true - * @param value - * value to output. - * @param out - * where to print the value. - * @param newRecord - * if this a new record. - * @throws IOException - * If an I/O error occurs. - * @since 1.4 - */ - public void print(final Object value, final Appendable out, final boolean newRecord) throws IOException { - // null values are considered empty - // Only call CharSequence.toString() if you have to, helps GC-free use cases. - CharSequence charSequence; - if (value == null) { - // https://issues.apache.org/jira/browse/CSV-203 - if (null == nullString) { - charSequence = EMPTY; - } else { - if (QuoteMode.ALL == quoteMode) { - charSequence = quotedNullString; - } else { - charSequence = nullString; - } - } - } else { - if (value instanceof CharSequence) { - charSequence = (CharSequence) value; - } else if (value instanceof Reader) { - print((Reader) value, out, newRecord); - return; - } else { - charSequence = value.toString(); - } - } - charSequence = getTrim() ? trim(charSequence) : charSequence; - print(value, charSequence, out, newRecord); - } - - private void print(final Object object, final CharSequence value, final Appendable out, final boolean newRecord) - throws IOException { - final int offset = 0; - final int len = value.length(); - if (!newRecord) { - out.append(getDelimiter()); - } - if (object == null) { - out.append(value); - } else if (isQuoteCharacterSet()) { - // the original object is needed so can check for Number - printWithQuotes(object, value, out, newRecord); - } else if (isEscapeCharacterSet()) { - printWithEscapes(value, out); - } else { - out.append(value, offset, len); - } - } - - /** - * Prints to the specified output, returns a {@code CSVPrinter} which the caller MUST close. - * - *

- * See also {@link CSVPrinter}. - *

- * - * @param out the output. - * @param charset A charset. - * @return a printer to an output. - * @throws IOException thrown if the optional header cannot be printed. - * @since 1.5 - */ - @SuppressWarnings("resource") - public CSVPrinter print(final Path out, final Charset charset) throws IOException { - return print(Files.newBufferedWriter(out, charset)); - } - - private void print(final Reader reader, final Appendable out, final boolean newRecord) throws IOException { - // Reader is never null - if (!newRecord) { - out.append(getDelimiter()); - } - if (isQuoteCharacterSet()) { - printWithQuotes(reader, out); - } else if (isEscapeCharacterSet()) { - printWithEscapes(reader, out); - } else if (out instanceof Writer) { - IOUtils.copyLarge(reader, (Writer) out); - } else { - IOUtils.copy(reader, out); - } - - } - - /** - * Prints to the {@link System#out}. - * - *

- * See also {@link CSVPrinter}. - *

- * - * @return a printer to {@link System#out}. - * @throws IOException - * thrown if the optional header cannot be printed. - * @since 1.5 - */ - public CSVPrinter printer() throws IOException { - return new CSVPrinter(System.out, this); - } - - /** - * Outputs the trailing delimiter (if set) followed by the record separator (if set). - * - * @param out - * where to write - * @throws IOException - * If an I/O error occurs - * @since 1.4 - */ - public void println(final Appendable out) throws IOException { - if (getTrailingDelimiter()) { - out.append(getDelimiter()); - } - if (recordSeparator != null) { - out.append(recordSeparator); - } - } - - /** - * Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the - * record separator. - * - *

- * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record - * separator to the output after printing the record, so there is no need to call {@link #println(Appendable)}. - *

- * - * @param out - * where to write. - * @param values - * values to output. - * @throws IOException - * If an I/O error occurs. - * @since 1.4 - */ - public void printRecord(final Appendable out, final Object... values) throws IOException { - for (int i = 0; i < values.length; i++) { - print(values[i], out, i == 0); - } - println(out); - } - - /* - * Note: must only be called if escaping is enabled, otherwise will generate NPE - */ - private void printWithEscapes(final CharSequence value, final Appendable out) throws IOException { - int start = 0; - int pos = 0; - final int len = value.length(); - final int end = len; - - final char delim = getDelimiter(); - final char escape = getEscapeCharacter().charValue(); - - while (pos < end) { - char c = value.charAt(pos); - if (c == CR || c == LF || c == delim || c == escape) { - // write out segment up until this char - if (pos > start) { - out.append(value, start, pos); - } - if (c == LF) { - c = 'n'; - } else if (c == CR) { - c = 'r'; - } - - out.append(escape); - out.append(c); - - start = pos + 1; // start on the current char after this one - } - pos++; - } - - // write last segment - if (pos > start) { - out.append(value, start, pos); - } - } - - private void printWithEscapes(final Reader reader, final Appendable out) throws IOException { - int start = 0; - int pos = 0; - - final char delim = getDelimiter(); - final char escape = getEscapeCharacter().charValue(); - final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); - - int c; - while (-1 != (c = reader.read())) { - builder.append((char) c); - if (c == CR || c == LF || c == delim || c == escape) { - // write out segment up until this char - if (pos > start) { - out.append(builder.substring(start, pos)); - builder.setLength(0); - pos = -1; - } - if (c == LF) { - c = 'n'; - } else if (c == CR) { - c = 'r'; - } - - out.append(escape); - out.append((char) c); - - start = pos + 1; // start on the current char after this one - } - pos++; - } - - // write last segment - if (pos > start) { - out.append(builder.substring(start, pos)); - } - } - - /* - * Note: must only be called if quoting is enabled, otherwise will generate NPE - */ - // the original object is needed so can check for Number - private void printWithQuotes(final Object object, final CharSequence value, final Appendable out, - final boolean newRecord) throws IOException { - boolean quote = false; - int start = 0; - int pos = 0; - final int len = value.length(); - final int end = len; - - final char delimChar = getDelimiter(); - final char quoteChar = getQuoteCharacter().charValue(); - // If escape char not specified, default to the quote char - // This avoids having to keep checking whether there is an escape character - // at the cost of checking against quote twice - final char escapeChar = isEscapeCharacterSet() ? getEscapeCharacter().charValue() : quoteChar; - - QuoteMode quoteModePolicy = getQuoteMode(); - if (quoteModePolicy == null) { - quoteModePolicy = QuoteMode.MINIMAL; - } - switch (quoteModePolicy) { - case ALL: - case ALL_NON_NULL: - quote = true; - break; - case NON_NUMERIC: - quote = !(object instanceof Number); - break; - case NONE: - // Use the existing escaping code - printWithEscapes(value, out); - return; - case MINIMAL: - if (len <= 0) { - // always quote an empty token that is the first - // on the line, as it may be the only thing on the - // line. If it were not quoted in that case, - // an empty line has no tokens. - if (newRecord) { - quote = true; - } - } else { - char c = value.charAt(pos); - - if (c <= COMMENT) { - // Some other chars at the start of a value caused the parser to fail, so for now - // encapsulate if we start in anything less than '#'. We are being conservative - // by including the default comment char too. - quote = true; - } else { - while (pos < end) { - c = value.charAt(pos); - if (c == LF || c == CR || c == quoteChar || c == delimChar || c == escapeChar) { - quote = true; - break; - } - pos++; - } - - if (!quote) { - pos = end - 1; - c = value.charAt(pos); - // Some other chars at the end caused the parser to fail, so for now - // encapsulate if we end in anything less than ' ' - if (c <= SP) { - quote = true; - } - } - } - } - - if (!quote) { - // no encapsulation needed - write out the original value - out.append(value, start, end); - return; - } - break; - default: - throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); - } - - if (!quote) { - // no encapsulation needed - write out the original value - out.append(value, start, end); - return; - } - - // we hit something that needed encapsulation - out.append(quoteChar); - - // Pick up where we left off: pos should be positioned on the first character that caused - // the need for encapsulation. - while (pos < end) { - final char c = value.charAt(pos); - if (c == quoteChar || c == escapeChar) { - // write out the chunk up until this point - out.append(value, start, pos); - out.append(escapeChar); // now output the escape - start = pos; // and restart with the matched char - } - pos++; - } - - // write the last segment - out.append(value, start, pos); - out.append(quoteChar); - } - - /** - * Always use quotes unless QuoteMode is NONE, so we not have to look ahead. - * - * @throws IOException - */ - private void printWithQuotes(final Reader reader, final Appendable out) throws IOException { - - if (getQuoteMode() == QuoteMode.NONE) { - printWithEscapes(reader, out); - return; - } - - int pos = 0; - - final char quote = getQuoteCharacter().charValue(); - final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); - - out.append(quote); - - int c; - while (-1 != (c = reader.read())) { - builder.append((char) c); - if (c == quote) { - // write out segment up until this char - if (pos > 0) { - out.append(builder.substring(0, pos)); - builder.setLength(0); - pos = -1; - } - - out.append(quote); - out.append((char) c); - } - pos++; - } - - // write last segment - if (pos > 0) { - out.append(builder.substring(0, pos)); - } - - out.append(quote); - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - sb.append("Delimiter=<").append(delimiter).append('>'); - if (isEscapeCharacterSet()) { - sb.append(' '); - sb.append("Escape=<").append(escapeCharacter).append('>'); - } - if (isQuoteCharacterSet()) { - sb.append(' '); - sb.append("QuoteChar=<").append(quoteCharacter).append('>'); - } - if (quoteMode != null) { - sb.append(' '); - sb.append("QuoteMode=<").append(quoteMode).append('>'); - } - if (isCommentMarkerSet()) { - sb.append(' '); - sb.append("CommentStart=<").append(commentMarker).append('>'); - } - if (isNullStringSet()) { - sb.append(' '); - sb.append("NullString=<").append(nullString).append('>'); - } - if (recordSeparator != null) { - sb.append(' '); - sb.append("RecordSeparator=<").append(recordSeparator).append('>'); - } - if (getIgnoreEmptyLines()) { - sb.append(" EmptyLines:ignored"); - } - if (getIgnoreSurroundingSpaces()) { - sb.append(" SurroundingSpaces:ignored"); - } - if (getIgnoreHeaderCase()) { - sb.append(" IgnoreHeaderCase:ignored"); - } - sb.append(" SkipHeaderRecord:").append(skipHeaderRecord); - if (headerComments != null) { - sb.append(' '); - sb.append("HeaderComments:").append(Arrays.toString(headerComments)); - } - if (header != null) { - sb.append(' '); - sb.append("Header:").append(Arrays.toString(header)); - } - return sb.toString(); - } - - private String[] toStringArray(final Object[] values) { - if (values == null) { - return null; - } - final String[] strings = new String[values.length]; - for (int i = 0; i < values.length; i++) { - final Object value = values[i]; - strings[i] = value == null ? null : value.toString(); - } - return strings; - } - - private CharSequence trim(final CharSequence charSequence) { - if (charSequence instanceof String) { - return ((String) charSequence).trim(); - } - final int count = charSequence.length(); - int len = count; - int pos = 0; - - while (pos < len && charSequence.charAt(pos) <= SP) { - pos++; - } - while (pos < len && charSequence.charAt(len - 1) <= SP) { - len--; - } - return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence; - } - - /** - * Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary. - * - * @throws IllegalArgumentException - */ - private void validate() throws IllegalArgumentException { - if (isLineBreak(delimiter)) { - throw new IllegalArgumentException("The delimiter cannot be a line break"); - } - - if (quoteCharacter != null && delimiter == quoteCharacter.charValue()) { - throw new IllegalArgumentException( - "The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); - } - - if (escapeCharacter != null && delimiter == escapeCharacter.charValue()) { - throw new IllegalArgumentException( - "The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')"); - } - - if (commentMarker != null && delimiter == commentMarker.charValue()) { - throw new IllegalArgumentException( - "The comment start character and the delimiter cannot be the same ('" + commentMarker + "')"); - } - - if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) { - throw new IllegalArgumentException( - "The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')"); - } - - if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) { - throw new IllegalArgumentException( - "The comment start and the escape character cannot be the same ('" + commentMarker + "')"); - } - - if (escapeCharacter == null && quoteMode == QuoteMode.NONE) { - throw new IllegalArgumentException("No quotes mode set but no escape character is set"); - } - - // validate header - if (header != null && !allowDuplicateHeaderNames) { - final Set dupCheck = new HashSet<>(); - for (final String hdr : header) { - if (!dupCheck.add(hdr)) { - throw new IllegalArgumentException( - "The header contains a duplicate entry: '" + hdr + "' in " + Arrays.toString(header)); - } - } - } - } - - /** - * Returns a new {@code CSVFormat} that allows duplicate header names. - * - * @return a new {@code CSVFormat} that allows duplicate header names - * @since 1.7 - */ - public CSVFormat withAllowDuplicateHeaderNames() { - return withAllowDuplicateHeaderNames(true); - } - - /** - * Returns a new {@code CSVFormat} with duplicate header names behavior set to the given value. - * - * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. - * @return a new {@code CSVFormat} with duplicate header names behavior set to the given value. - * @since 1.7 - */ - public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true} - * - * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. - * @see #withAllowMissingColumnNames(boolean) - * @since 1.1 - */ - public CSVFormat withAllowMissingColumnNames() { - return this.withAllowMissingColumnNames(true); - } - - /** - * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to the given value. - * - * @param allowMissingColumnNames - * the missing column names behavior, {@code true} to allow missing column names in the header line, - * {@code false} to cause an {@link IllegalArgumentException} to be thrown. - * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. - */ - public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with whether to flush on close. - * - * @param autoFlush - * whether to flush on close. - * - * @return A new CSVFormat that is equal to this but with the specified autoFlush setting. - * @since 1.6 - */ - public CSVFormat withAutoFlush(final boolean autoFlush) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character. - * - * Note that the comment start character is only recognized at the start of a line. - * - * @param commentMarker - * the comment start marker - * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withCommentMarker(final char commentMarker) { - return withCommentMarker(Character.valueOf(commentMarker)); - } - - /** - * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character. - * - * Note that the comment start character is only recognized at the start of a line. - * - * @param commentMarker - * the comment start marker, use {@code null} to disable - * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withCommentMarker(final Character commentMarker) { - if (isLineBreak(commentMarker)) { - throw new IllegalArgumentException("The comment start marker character cannot be a line break"); - } - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the delimiter of the format set to the specified character. - * - * @param delimiter - * the delimiter character - * @return A new CSVFormat that is equal to this with the specified character as delimiter - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withDelimiter(final char delimiter) { - if (isLineBreak(delimiter)) { - throw new IllegalArgumentException("The delimiter cannot be a line break"); - } - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character. - * - * @param escape - * the escape character - * @return A new CSVFormat that is equal to his but with the specified character as the escape character - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withEscape(final char escape) { - return withEscape(Character.valueOf(escape)); - } - - /** - * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character. - * - * @param escape - * the escape character, use {@code null} to disable - * @return A new CSVFormat that is equal to this but with the specified character as the escape character - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withEscape(final Character escape) { - if (isLineBreak(escape)) { - throw new IllegalArgumentException("The escape character cannot be a line break"); - } - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces, - ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord, - allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} using the first record as header. - * - *

- * Calling this method is equivalent to calling: - *

- * - *
-     * CSVFormat format = aFormat.withHeader().withSkipHeaderRecord();
-     * 
- * - * @return A new CSVFormat that is equal to this but using the first record as header. - * @see #withSkipHeaderRecord(boolean) - * @see #withHeader(String...) - * @since 1.3 - */ - public CSVFormat withFirstRecordAsHeader() { - return withHeader().withSkipHeaderRecord(); - } - - /** - * Returns a new {@code CSVFormat} with the header of the format defined by the enum class. - * - *

- * Example: - *

- * - *
-     * public enum Header {
-     *     Name, Email, Phone
-     * }
-     *
-     * CSVFormat format = aformat.withHeader(Header.class);
-     * 
- *

- * The header is also used by the {@link CSVPrinter}. - *

- * - * @param headerEnum - * the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified - * otherwise. - * - * @return A new CSVFormat that is equal to this but with the specified header - * @see #withHeader(String...) - * @see #withSkipHeaderRecord(boolean) - * @since 1.3 - */ - public CSVFormat withHeader(final Class> headerEnum) { - String[] header = null; - if (headerEnum != null) { - final Enum[] enumValues = headerEnum.getEnumConstants(); - header = new String[enumValues.length]; - for (int i = 0; i < enumValues.length; i++) { - header[i] = enumValues[i].name(); - } - } - return withHeader(header); - } - - /** - * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can - * either be parsed automatically from the input file with: - * - *
-     * CSVFormat format = aformat.withHeader();
-     * 
- * - * or specified manually with: - * - *
-     * CSVFormat format = aformat.withHeader(resultSet);
-     * 
- *

- * The header is also used by the {@link CSVPrinter}. - *

- * - * @param resultSet - * the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified - * otherwise. - * - * @return A new CSVFormat that is equal to this but with the specified header - * @throws SQLException - * SQLException if a database access error occurs or this method is called on a closed result set. - * @since 1.1 - */ - public CSVFormat withHeader(final ResultSet resultSet) throws SQLException { - return withHeader(resultSet != null ? resultSet.getMetaData() : null); - } - - /** - * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can - * either be parsed automatically from the input file with: - * - *
-     * CSVFormat format = aformat.withHeader();
-     * 
- * - * or specified manually with: - * - *
-     * CSVFormat format = aformat.withHeader(metaData);
-     * 
- *

- * The header is also used by the {@link CSVPrinter}. - *

- * - * @param metaData - * the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified - * otherwise. - * - * @return A new CSVFormat that is equal to this but with the specified header - * @throws SQLException - * SQLException if a database access error occurs or this method is called on a closed result set. - * @since 1.1 - */ - public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLException { - String[] labels = null; - if (metaData != null) { - final int columnCount = metaData.getColumnCount(); - labels = new String[columnCount]; - for (int i = 0; i < columnCount; i++) { - labels[i] = metaData.getColumnLabel(i + 1); - } - } - return withHeader(labels); - } - - /** - * Returns a new {@code CSVFormat} with the header of the format set to the given values. The header can either be - * parsed automatically from the input file with: - * - *
-     * CSVFormat format = aformat.withHeader();
-     * 
- * - * or specified manually with: - * - *
-     * CSVFormat format = aformat.withHeader("name", "email", "phone");
-     * 
- *

- * The header is also used by the {@link CSVPrinter}. - *

- * - * @param header - * the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. - * - * @return A new CSVFormat that is equal to this but with the specified header - * @see #withSkipHeaderRecord(boolean) - */ - public CSVFormat withHeader(final String... header) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will - * be printed first, before the headers. This setting is ignored by the parser. - * - *
-     * CSVFormat format = aformat.withHeaderComments("Generated by Apache Commons CSV 1.1.", new Date());
-     * 
- * - * @param headerComments - * the headerComments which will be printed by the Printer before the actual CSV data. - * - * @return A new CSVFormat that is equal to this but with the specified header - * @see #withSkipHeaderRecord(boolean) - * @since 1.1 - */ - public CSVFormat withHeaderComments(final Object... headerComments) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}. - * - * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. - * @since {@link #withIgnoreEmptyLines(boolean)} - * @since 1.1 - */ - public CSVFormat withIgnoreEmptyLines() { - return this.withIgnoreEmptyLines(true); - } - - /** - * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value. - * - * @param ignoreEmptyLines - * the empty line skipping behavior, {@code true} to ignore the empty lines between the records, - * {@code false} to translate empty lines to empty records. - * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. - */ - public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the header ignore case behavior set to {@code true}. - * - * @return A new CSVFormat that will ignore case header name. - * @see #withIgnoreHeaderCase(boolean) - * @since 1.3 - */ - public CSVFormat withIgnoreHeaderCase() { - return this.withIgnoreHeaderCase(true); - } - - /** - * Returns a new {@code CSVFormat} with whether header names should be accessed ignoring case. - * - * @param ignoreHeaderCase - * the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as - * is. - * @return A new CSVFormat that will ignore case header name if specified as {@code true} - * @since 1.3 - */ - public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}. - * - * @return A new CSVFormat that is equal to this but with the specified parser trimming behavior. - * @see #withIgnoreSurroundingSpaces(boolean) - * @since 1.1 - */ - public CSVFormat withIgnoreSurroundingSpaces() { - return this.withIgnoreSurroundingSpaces(true); - } - - /** - * Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value. - * - * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, - * {@code false} to leave the spaces as is. - * @return A new CSVFormat that is equal to this but with the specified trimming behavior. - */ - public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with conversions to and from null for strings on input and output. - * - * - * @param nullString - * the String to convert to and from {@code null}. No substitution occurs if {@code null} - * - * @return A new CSVFormat that is equal to this but with the specified null conversion string. - */ - public CSVFormat withNullString(final String nullString) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character. - * - * @param quoteChar - * the quoteChar character - * @return A new CSVFormat that is equal to this but with the specified character as quoteChar - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withQuote(final char quoteChar) { - return withQuote(Character.valueOf(quoteChar)); - } - - /** - * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character. - * - * @param quoteChar - * the quoteChar character, use {@code null} to disable - * @return A new CSVFormat that is equal to this but with the specified character as quoteChar - * @throws IllegalArgumentException - * thrown if the specified character is a line break - */ - public CSVFormat withQuote(final Character quoteChar) { - if (isLineBreak(quoteChar)) { - throw new IllegalArgumentException("The quoteChar cannot be a line break"); - } - return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, - ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord, - allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the output quote policy of the format set to the specified value. - * - * @param quoteModePolicy - * the quote policy to use for output. - * - * @return A new CSVFormat that is equal to this but with the specified quote policy - */ - public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) { - return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the record separator of the format set to the specified character. - * - *

- * Note: This setting is only used during printing and does not affect parsing. Parsing currently - * only works for inputs with '\n', '\r' and "\r\n" - *

- * - * @param recordSeparator - * the record separator to use for output. - * - * @return A new CSVFormat that is equal to this but with the specified output record separator - */ - public CSVFormat withRecordSeparator(final char recordSeparator) { - return withRecordSeparator(String.valueOf(recordSeparator)); - } - - /** - * Returns a new {@code CSVFormat} with the record separator of the format set to the specified String. - * - *

- * Note: This setting is only used during printing and does not affect parsing. Parsing currently - * only works for inputs with '\n', '\r' and "\r\n" - *

- * - * @param recordSeparator - * the record separator to use for output. - * - * @return A new CSVFormat that is equal to this but with the specified output record separator - * @throws IllegalArgumentException - * if recordSeparator is none of CR, LF or CRLF - */ - public CSVFormat withRecordSeparator(final String recordSeparator) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with skipping the header record set to {@code true}. - * - * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. - * @see #withSkipHeaderRecord(boolean) - * @see #withHeader(String...) - * @since 1.1 - */ - public CSVFormat withSkipHeaderRecord() { - return this.withSkipHeaderRecord(true); - } - - /** - * Returns a new {@code CSVFormat} with whether to skip the header record. - * - * @param skipHeaderRecord - * whether to skip the header record. - * - * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. - * @see #withHeader(String...) - */ - public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the record separator of the format set to the operating system's line - * separator string, typically CR+LF on Windows and LF on Linux. - * - *

- * Note: This setting is only used during printing and does not affect parsing. Parsing currently - * only works for inputs with '\n', '\r' and "\r\n" - *

- * - * @return A new CSVFormat that is equal to this but with the operating system's line separator string. - * @since 1.6 - */ - public CSVFormat withSystemRecordSeparator() { - return withRecordSeparator(System.getProperty("line.separator")); - } - - /** - * Returns a new {@code CSVFormat} to add a trailing delimiter. - * - * @return A new CSVFormat that is equal to this but with the trailing delimiter setting. - * @since 1.3 - */ - public CSVFormat withTrailingDelimiter() { - return withTrailingDelimiter(true); - } - - /** - * Returns a new {@code CSVFormat} with whether to add a trailing delimiter. - * - * @param trailingDelimiter - * whether to add a trailing delimiter. - * - * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting. - * @since 1.3 - */ - public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} to trim leading and trailing blanks. - * See {@link #getTrim()} for details of where this is used. - * - * @return A new CSVFormat that is equal to this but with the trim setting on. - * @since 1.3 - */ - public CSVFormat withTrim() { - return withTrim(true); - } - - /** - * Returns a new {@code CSVFormat} with whether to trim leading and trailing blanks. - * See {@link #getTrim()} for details of where this is used. - * - * @param trim - * whether to trim leading and trailing blanks. - * - * @return A new CSVFormat that is equal to this but with the specified trim setting. - * @since 1.3 - */ - public CSVFormat withTrim(final boolean trim) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } -} diff --git a/src/test/resources/org/apache/commons/csv/CSVParser.java b/src/test/resources/org/apache/commons/csv/CSVParser.java deleted file mode 100644 index bf6eb6d6..00000000 --- a/src/test/resources/org/apache/commons/csv/CSVParser.java +++ /dev/null @@ -1,715 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Token.Type.TOKEN; - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.io.StringReader; -import java.net.URL; -import java.nio.charset.Charset; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Objects; -import java.util.TreeMap; - -/** - * Parses CSV files according to the specified format. - * - * Because CSV appears in many different dialects, the parser supports many formats by allowing the - * specification of a {@link CSVFormat}. - * - * The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream. - * - *

Creating instances

- *

- * There are several static factory methods that can be used to create instances for various types of resources: - *

- * - *

- * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor. - * - * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut: - *

- *
- * for(CSVRecord record : CSVFormat.EXCEL.parse(in)) {
- *     ...
- * }
- * 
- * - *

Parsing record wise

- *

- * To parse a CSV input from a file, you write: - *

- * - *
- * File csvData = new File("/path/to/csv");
- * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180);
- * for (CSVRecord csvRecord : parser) {
- *     ...
- * }
- * 
- * - *

- * This will read the parse the contents of the file using the - * RFC 4180 format. - *

- * - *

- * To parse CSV input in a format like Excel, you write: - *

- * - *
- * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL);
- * for (CSVRecord csvRecord : parser) {
- *     ...
- * }
- * 
- * - *

- * If the predefined formats don't match the format at hands, custom formats can be defined. More information about - * customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}. - *

- * - *

Parsing into memory

- *

- * If parsing record wise is not desired, the contents of the input can be read completely into memory. - *

- * - *
- * Reader in = new StringReader("a;b\nc;d");
- * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
- * List<CSVRecord> list = parser.getRecords();
- * 
- * - *

- * There are two constraints that have to be kept in mind: - *

- * - *
    - *
  1. Parsing into memory starts at the current position of the parser. If you have already parsed records from - * the input, those records will not end up in the in memory representation of your CSV data.
  2. - *
  3. Parsing into memory may consume a lot of system resources depending on the input. For example if you're - * parsing a 150MB file of CSV data the contents will be read completely into memory.
  4. - *
- * - *

Notes

- *

- * Internal parser state is completely covered by the format and the reader-state. - *

- * - * @see package documentation for more details - */ -public final class CSVParser implements Iterable, Closeable { - - class CSVRecordIterator implements Iterator { - private CSVRecord current; - - private CSVRecord getNextRecord() { - try { - return CSVParser.this.nextRecord(); - } catch (final IOException e) { - throw new IllegalStateException( - e.getClass().getSimpleName() + " reading next record: " + e.toString(), e); - } - } - - @Override - public boolean hasNext() { - if (CSVParser.this.isClosed()) { - return false; - } - if (this.current == null) { - this.current = this.getNextRecord(); - } - - return this.current != null; - } - - @Override - public CSVRecord next() { - if (CSVParser.this.isClosed()) { - throw new NoSuchElementException("CSVParser has been closed"); - } - CSVRecord next = this.current; - this.current = null; - - if (next == null) { - // hasNext() wasn't called before - next = this.getNextRecord(); - if (next == null) { - throw new NoSuchElementException("No more CSV records available"); - } - } - - return next; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - } - - /** - * Header information based on name and position. - */ - private static final class Headers { - /** - * Header column positions (0-based) - */ - final Map headerMap; - - /** - * Header names in column order - */ - final List headerNames; - - Headers(final Map headerMap, final List headerNames) { - this.headerMap = headerMap; - this.headerNames = headerNames; - } - } - - /** - * Creates a parser for the given {@link File}. - * - * @param file - * a CSV file. Must not be null. - * @param charset - * The Charset to decode the given file. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new parser - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either file or format are null. - * @throws IOException - * If an I/O error occurs - */ - @SuppressWarnings("resource") - public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException { - Objects.requireNonNull(file, "file"); - Objects.requireNonNull(format, "format"); - return new CSVParser(new InputStreamReader(new FileInputStream(file), charset), format); - } - - /** - * Creates a CSV parser using the given {@link CSVFormat}. - * - *

- * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, - * unless you close the {@code reader}. - *

- * - * @param inputStream - * an InputStream containing CSV-formatted input. Must not be null. - * @param charset - * The Charset to decode the given file. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new CSVParser configured with the given reader and format. - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either reader or format are null. - * @throws IOException - * If there is a problem reading the header or skipping the first record - * @since 1.5 - */ - @SuppressWarnings("resource") - public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format) - throws IOException { - Objects.requireNonNull(inputStream, "inputStream"); - Objects.requireNonNull(format, "format"); - return parse(new InputStreamReader(inputStream, charset), format); - } - - /** - * Creates and returns a parser for the given {@link Path}, which the caller MUST close. - * - * @param path - * a CSV file. Must not be null. - * @param charset - * The Charset to decode the given file. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new parser - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either file or format are null. - * @throws IOException - * If an I/O error occurs - * @since 1.5 - */ - @SuppressWarnings("resource") - public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException { - Objects.requireNonNull(path, "path"); - Objects.requireNonNull(format, "format"); - return parse(Files.newInputStream(path), charset, format); - } - - /** - * Creates a CSV parser using the given {@link CSVFormat} - * - *

- * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, - * unless you close the {@code reader}. - *

- * - * @param reader - * a Reader containing CSV-formatted input. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new CSVParser configured with the given reader and format. - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either reader or format are null. - * @throws IOException - * If there is a problem reading the header or skipping the first record - * @since 1.5 - */ - public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException { - return new CSVParser(reader, format); - } - - // the following objects are shared to reduce garbage - - /** - * Creates a parser for the given {@link String}. - * - * @param string - * a CSV string. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new parser - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either string or format are null. - * @throws IOException - * If an I/O error occurs - */ - public static CSVParser parse(final String string, final CSVFormat format) throws IOException { - Objects.requireNonNull(string, "string"); - Objects.requireNonNull(format, "format"); - - return new CSVParser(new StringReader(string), format); - } - - /** - * Creates and returns a parser for the given URL, which the caller MUST close. - * - *

- * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless - * you close the {@code url}. - *

- * - * @param url - * a URL. Must not be null. - * @param charset - * the charset for the resource. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new parser - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either url, charset or format are null. - * @throws IOException - * If an I/O error occurs - */ - @SuppressWarnings("resource") - public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException { - Objects.requireNonNull(url, "url"); - Objects.requireNonNull(charset, "charset"); - Objects.requireNonNull(format, "format"); - - return new CSVParser(new InputStreamReader(url.openStream(), charset), format); - } - - private final CSVFormat format; - - /** A mapping of column names to column indices */ - private final Map headerMap; - - /** The column order to avoid re-computing it. */ - private final List headerNames; - - private final Lexer lexer; - - private final CSVRecordIterator csvRecordIterator; - - /** A record buffer for getRecord(). Grows as necessary and is reused. */ - private final List recordList = new ArrayList<>(); - - /** - * The next record number to assign. - */ - private long recordNumber; - - /** - * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination - * with {@link #recordNumber}. - */ - private final long characterOffset; - - private final Token reusableToken = new Token(); - - /** - * Customized CSV parser using the given {@link CSVFormat} - * - *

- * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, - * unless you close the {@code reader}. - *

- * - * @param reader - * a Reader containing CSV-formatted input. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either reader or format are null. - * @throws IOException - * If there is a problem reading the header or skipping the first record - */ - public CSVParser(final Reader reader, final CSVFormat format) throws IOException { - this(reader, format, 0, 1); - } - - /** - * Customized CSV parser using the given {@link CSVFormat} - * - *

- * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, - * unless you close the {@code reader}. - *

- * - * @param reader - * a Reader containing CSV-formatted input. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @param characterOffset - * Lexer offset when the parser does not start parsing at the beginning of the source. - * @param recordNumber - * The next record number to assign - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either reader or format are null. - * @throws IOException - * If there is a problem reading the header or skipping the first record - * @since 1.1 - */ - @SuppressWarnings("resource") - public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) - throws IOException { - Objects.requireNonNull(reader, "reader"); - Objects.requireNonNull(format, "format"); - - this.format = format; - this.lexer = new Lexer(format, new ExtendedBufferedReader(reader)); - this.csvRecordIterator = new CSVRecordIterator(); - final Headers headers = createHeaders(); - this.headerMap = headers.headerMap; - this.headerNames = headers.headerNames; - this.characterOffset = characterOffset; - this.recordNumber = recordNumber - 1; - } - - private void addRecordValue(final boolean lastRecord) { - final String input = this.reusableToken.content.toString(); - final String inputClean = this.format.getTrim() ? input.trim() : input; - if (lastRecord && inputClean.isEmpty() && this.format.getTrailingDelimiter()) { - return; - } - final String nullString = this.format.getNullString(); - this.recordList.add(inputClean.equals(nullString) ? null : inputClean); - } - - /** - * Closes resources. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void close() throws IOException { - if (this.lexer != null) { - this.lexer.close(); - } - } - - private Map createEmptyHeaderMap() { - return this.format.getIgnoreHeaderCase() ? - new TreeMap<>(String.CASE_INSENSITIVE_ORDER) : - new LinkedHashMap<>(); - } - - /** - * Creates the name to index mapping if the format defines a header. - * - * @return null if the format has no header. - * @throws IOException if there is a problem reading the header or skipping the first record - */ - private Headers createHeaders() throws IOException { - Map hdrMap = null; - List headerNames = null; - final String[] formatHeader = this.format.getHeader(); - if (formatHeader != null) { - hdrMap = createEmptyHeaderMap(); - String[] headerRecord = null; - if (formatHeader.length == 0) { - // read the header from the first line of the file - final CSVRecord nextRecord = this.nextRecord(); - if (nextRecord != null) { - headerRecord = nextRecord.values(); - } - } else { - if (this.format.getSkipHeaderRecord()) { - this.nextRecord(); - } - headerRecord = formatHeader; - } - - // build the name to index mappings - if (headerRecord != null) { - for (int i = 0; i < headerRecord.length; i++) { - final String header = headerRecord[i]; - final boolean emptyHeader = header == null || header.trim().isEmpty(); - if (emptyHeader && !this.format.getAllowMissingColumnNames()) { - throw new IllegalArgumentException( - "A header name is missing in " + Arrays.toString(headerRecord)); - } - // Note: This will always allow a duplicate header if the header is empty - final boolean containsHeader = header != null && hdrMap.containsKey(header); - if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) { - throw new IllegalArgumentException( - String.format( - "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().", - header, Arrays.toString(headerRecord))); - } - if (header != null) { - hdrMap.put(header, Integer.valueOf(i)); - if (headerNames == null) { - headerNames = new ArrayList<>(headerRecord.length); - } - headerNames.add(header); - } - } - } - } - if (headerNames == null) { - headerNames = Collections.emptyList(); //immutable - } else { - headerNames = Collections.unmodifiableList(headerNames); - } - return new Headers(hdrMap, headerNames); - } - - /** - * Returns the current line number in the input stream. - * - *

- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to - * the record number. - *

- * - * @return current line number - */ - public long getCurrentLineNumber() { - return this.lexer.getCurrentLineNumber(); - } - - /** - * Gets the first end-of-line string encountered. - * - * @return the first end-of-line string - * @since 1.5 - */ - public String getFirstEndOfLine() { - return lexer.getFirstEol(); - } - - /** - * Returns a copy of the header map. - *

- * The map keys are column names. The map values are 0-based indices. - *

- *

- * Note: The map can only provide a one-to-one mapping when the format did not - * contain null or duplicate column names. - *

- * - * @return a copy of the header map. - */ - public Map getHeaderMap() { - if (this.headerMap == null) { - return null; - } - final Map map = createEmptyHeaderMap(); - map.putAll(this.headerMap); - return map; - } - - /** - * Returns the header map. - * - * @return the header map. - */ - Map getHeaderMapRaw() { - return this.headerMap; - } - - /** - * Returns a read-only list of header names that iterates in column order. - *

- * Note: The list provides strings that can be used as keys in the header map. - * The list will not contain null column names if they were present in the input - * format. - *

- * - * @return read-only list of header names that iterates in column order. - * @see #getHeaderMap() - * @since 1.7 - */ - public List getHeaderNames() { - return headerNames; - } - - /** - * Returns the current record number in the input stream. - * - *

- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to - * the line number. - *

- * - * @return current record number - */ - public long getRecordNumber() { - return this.recordNumber; - } - - /** - * Parses the CSV input according to the given format and returns the content as a list of - * {@link CSVRecord CSVRecords}. - * - *

- * The returned content starts at the current parse-position in the stream. - *

- * - * @return list of {@link CSVRecord CSVRecords}, may be empty - * @throws IOException - * on parse error or input read-failure - */ - public List getRecords() throws IOException { - CSVRecord rec; - final List records = new ArrayList<>(); - while ((rec = this.nextRecord()) != null) { - records.add(rec); - } - return records; - } - - /** - * Gets whether this parser is closed. - * - * @return whether this parser is closed. - */ - public boolean isClosed() { - return this.lexer.isClosed(); - } - - /** - * Returns an iterator on the records. - * - *

- * An {@link IOException} caught during the iteration are re-thrown as an - * {@link IllegalStateException}. - *

- *

- * If the parser is closed a call to {@link Iterator#next()} will throw a - * {@link NoSuchElementException}. - *

- */ - @Override - public Iterator iterator() { - return csvRecordIterator; - } - - /** - * Parses the next record from the current point in the stream. - * - * @return the record as an array of values, or {@code null} if the end of the stream has been reached - * @throws IOException - * on parse error or input read-failure - */ - CSVRecord nextRecord() throws IOException { - CSVRecord result = null; - this.recordList.clear(); - StringBuilder sb = null; - final long startCharPosition = lexer.getCharacterPosition() + this.characterOffset; - do { - this.reusableToken.reset(); - this.lexer.nextToken(this.reusableToken); - switch (this.reusableToken.type) { - case TOKEN: - this.addRecordValue(false); - break; - case EORECORD: - this.addRecordValue(true); - break; - case EOF: - if (this.reusableToken.isReady) { - this.addRecordValue(true); - } - break; - case INVALID: - throw new IOException("(line " + this.getCurrentLineNumber() + ") invalid parse sequence"); - case COMMENT: // Ignored currently - if (sb == null) { // first comment for this record - sb = new StringBuilder(); - } else { - sb.append(Constants.LF); - } - sb.append(this.reusableToken.content); - this.reusableToken.type = TOKEN; // Read another token - break; - default: - throw new IllegalStateException("Unexpected Token type: " + this.reusableToken.type); - } - } while (this.reusableToken.type == TOKEN); - - if (!this.recordList.isEmpty()) { - this.recordNumber++; - final String comment = sb == null ? null : sb.toString(); - result = new CSVRecord(this, this.recordList.toArray(new String[this.recordList.size()]), - comment, this.recordNumber, startCharPosition); - } - return result; - } - -} diff --git a/src/test/resources/org/apache/commons/csv/CSVPrinter.java b/src/test/resources/org/apache/commons/csv/CSVPrinter.java deleted file mode 100644 index c7377101..00000000 --- a/src/test/resources/org/apache/commons/csv/CSVPrinter.java +++ /dev/null @@ -1,392 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.LF; -import static org.apache.commons.csv.Constants.SP; - -import java.io.Closeable; -import java.io.Flushable; -import java.io.IOException; -import java.sql.Clob; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.Objects; - -/** - * Prints values in a {@link CSVFormat CSV format}. - * - *

Values can be appended to the output by calling the {@link #print(Object)} method. - * Values are printed according to {@link String#valueOf(Object)}. - * To complete a record the {@link #println()} method has to be called. - * Comments can be appended by calling {@link #printComment(String)}. - * However a comment will only be written to the output if the {@link CSVFormat} supports comments. - *

- * - *

The printer also supports appending a complete record at once by calling {@link #printRecord(Object...)} - * or {@link #printRecord(Iterable)}. - * Furthermore {@link #printRecords(Object...)}, {@link #printRecords(Iterable)} and {@link #printRecords(ResultSet)} - * methods can be used to print several records at once. - *

- * - *

Example:

- * - *
- * try (CSVPrinter printer = new CSVPrinter(new FileWriter("csv.txt"), CSVFormat.EXCEL)) {
- *     printer.printRecord("id", "userName", "firstName", "lastName", "birthday");
- *     printer.printRecord(1, "john73", "John", "Doe", LocalDate.of(1973, 9, 15));
- *     printer.println();
- *     printer.printRecord(2, "mary", "Mary", "Meyer", LocalDate.of(1985, 3, 29));
- * } catch (IOException ex) {
- *     ex.printStackTrace();
- * }
- * 
- * - *

This code will write the following to csv.txt:

- *
- * id,userName,firstName,lastName,birthday
- * 1,john73,John,Doe,1973-09-15
- *
- * 2,mary,Mary,Meyer,1985-03-29
- * 
- */ -public final class CSVPrinter implements Flushable, Closeable { - - /** The place that the values get written. */ - private final Appendable out; - private final CSVFormat format; - - /** True if we just began a new record. */ - private boolean newRecord = true; - - /** - * Creates a printer that will print values to the given stream following the CSVFormat. - *

- * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation - * and escaping with a different character) are not supported. - *

- * - * @param out - * stream to which to print. Must not be null. - * @param format - * the CSV format. Must not be null. - * @throws IOException - * thrown if the optional header cannot be printed. - * @throws IllegalArgumentException - * thrown if the parameters of the format are inconsistent or if either out or format are null. - */ - public CSVPrinter(final Appendable out, final CSVFormat format) throws IOException { - Objects.requireNonNull(out, "out"); - Objects.requireNonNull(format, "format"); - - this.out = out; - this.format = format; - // TODO: Is it a good idea to do this here instead of on the first call to a print method? - // It seems a pain to have to track whether the header has already been printed or not. - if (format.getHeaderComments() != null) { - for (final String line : format.getHeaderComments()) { - if (line != null) { - this.printComment(line); - } - } - } - if (format.getHeader() != null && !format.getSkipHeaderRecord()) { - this.printRecord((Object[]) format.getHeader()); - } - } - - // ====================================================== - // printing implementation - // ====================================================== - - @Override - public void close() throws IOException { - close(false); - } - - /** - * Closes the underlying stream with an optional flush first. - * @param flush whether to flush before the actual close. - * - * @throws IOException - * If an I/O error occurs - * @since 1.6 - */ - public void close(final boolean flush) throws IOException { - if (flush || format.getAutoFlush()) { - flush(); - } - if (out instanceof Closeable) { - ((Closeable) out).close(); - } - } - - /** - * Flushes the underlying stream. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void flush() throws IOException { - if (out instanceof Flushable) { - ((Flushable) out).flush(); - } - } - - /** - * Gets the target Appendable. - * - * @return the target Appendable. - */ - public Appendable getOut() { - return this.out; - } - - /** - * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed. - * - * @param value - * value to be output. - * @throws IOException - * If an I/O error occurs - */ - public void print(final Object value) throws IOException { - format.print(value, out, newRecord); - newRecord = false; - } - - /** - * Prints a comment on a new line among the delimiter separated values. - * - *

- * Comments will always begin on a new line and occupy at least one full line. The character specified to start - * comments and a space will be inserted at the beginning of each new line in the comment. - *

- * - *

- * If comments are disabled in the current CSV format this method does nothing. - *

- * - *

This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()} - * to start a new line of the comment. Note that this might produce unexpected results for formats that do not use - * line breaks as record separator.

- * - * @param comment - * the comment to output - * @throws IOException - * If an I/O error occurs - */ - public void printComment(final String comment) throws IOException { - if (!format.isCommentMarkerSet()) { - return; - } - if (!newRecord) { - println(); - } - out.append(format.getCommentMarker().charValue()); - out.append(SP); - final int commentLength = comment.length(); - for (int i = 0; i < commentLength; i++) { - final char c = comment.charAt(i); - switch (c) { - case CR: - if (i + 1 < commentLength && comment.charAt(i + 1) == LF) { - i++; - } - //$FALL-THROUGH$ break intentionally excluded. - case LF: - println(); - out.append(format.getCommentMarker().charValue()); - out.append(SP); - break; - default: - out.append(c); - break; - } - } - println(); - } - - /** - * Outputs the record separator. - * - * @throws IOException - * If an I/O error occurs - */ - public void println() throws IOException { - format.println(out); - newRecord = true; - } - - /** - * Prints the given values a single record of delimiter separated values followed by the record separator. - * - *

- * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record - * separator to the output after printing the record, so there is no need to call {@link #println()}. - *

- * - * @param values - * values to output. - * @throws IOException - * If an I/O error occurs - */ - public void printRecord(final Iterable values) throws IOException { - for (final Object value : values) { - print(value); - } - println(); - } - - /** - * Prints the given values a single record of delimiter separated values followed by the record separator. - * - *

- * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record - * separator to the output after printing the record, so there is no need to call {@link #println()}. - *

- * - * @param values - * values to output. - * @throws IOException - * If an I/O error occurs - */ - public void printRecord(final Object... values) throws IOException { - format.printRecord(out, values); - newRecord = true; - } - - /** - * Prints all the objects in the given collection handling nested collections/arrays as records. - * - *

- * If the given collection only contains simple objects, this method will print a single record like - * {@link #printRecord(Iterable)}. If the given collections contains nested collections/arrays those nested elements - * will each be printed as records using {@link #printRecord(Object...)}. - *

- * - *

- * Given the following data structure: - *

- * - *
-     * 
-     * List<String[]> data = ...
-     * data.add(new String[]{ "A", "B", "C" });
-     * data.add(new String[]{ "1", "2", "3" });
-     * data.add(new String[]{ "A1", "B2", "C3" });
-     * 
-     * 
- * - *

- * Calling this method will print: - *

- * - *
-     * 
-     * A, B, C
-     * 1, 2, 3
-     * A1, B2, C3
-     * 
-     * 
- * - * @param values - * the values to print. - * @throws IOException - * If an I/O error occurs - */ - public void printRecords(final Iterable values) throws IOException { - for (final Object value : values) { - if (value instanceof Object[]) { - this.printRecord((Object[]) value); - } else if (value instanceof Iterable) { - this.printRecord((Iterable) value); - } else { - this.printRecord(value); - } - } - } - - /** - * Prints all the objects in the given array handling nested collections/arrays as records. - * - *

- * If the given array only contains simple objects, this method will print a single record like - * {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested - * elements will each be printed as records using {@link #printRecord(Object...)}. - *

- * - *

- * Given the following data structure: - *

- * - *
-     * 
-     * String[][] data = new String[3][]
-     * data[0] = String[]{ "A", "B", "C" };
-     * data[1] = new String[]{ "1", "2", "3" };
-     * data[2] = new String[]{ "A1", "B2", "C3" };
-     * 
-     * 
- * - *

- * Calling this method will print: - *

- * - *
-     * 
-     * A, B, C
-     * 1, 2, 3
-     * A1, B2, C3
-     * 
-     * 
- * - * @param values - * the values to print. - * @throws IOException - * If an I/O error occurs - */ - public void printRecords(final Object... values) throws IOException { - printRecords(Arrays.asList(values)); - } - - /** - * Prints all the objects in the given JDBC result set. - * - * @param resultSet - * result set the values to print. - * @throws IOException - * If an I/O error occurs - * @throws SQLException - * if a database access error occurs - */ - public void printRecords(final ResultSet resultSet) throws SQLException, IOException { - final int columnCount = resultSet.getMetaData().getColumnCount(); - while (resultSet.next()) { - for (int i = 1; i <= columnCount; i++) { - final Object object = resultSet.getObject(i); - // TODO Who manages the Clob? The JDBC driver or must we close it? Is it driver-dependent? - print(object instanceof Clob ? ((Clob) object).getCharacterStream() : object); - } - println(); - } - } -} diff --git a/src/test/resources/org/apache/commons/csv/CSVRecord.java b/src/test/resources/org/apache/commons/csv/CSVRecord.java deleted file mode 100644 index 81d1f2b6..00000000 --- a/src/test/resources/org/apache/commons/csv/CSVRecord.java +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import java.io.Serializable; -import java.util.Arrays; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Objects; - -/** - * A CSV record parsed from a CSV file. - * - *

- * Note: Support for {@link Serializable} is scheduled to be removed in version 2.0. - * In version 1.8 the mapping between the column header and the column index was - * removed from the serialised state. The class maintains serialization compatibility - * with versions pre-1.8 for the record values; these must be accessed by index - * following deserialization. There will be loss of any functionally linked to the header - * mapping when transferring serialised forms pre-1.8 to 1.8 and vice versa. - *

- */ -public final class CSVRecord implements Serializable, Iterable { - - private static final String[] EMPTY_STRING_ARRAY = new String[0]; - - private static final long serialVersionUID = 1L; - - private final long characterPosition; - - /** The accumulated comments (if any) */ - private final String comment; - - /** The record number. */ - private final long recordNumber; - - /** The values of the record */ - private final String[] values; - - /** The parser that originates this record. This is not serialized. */ - private final transient CSVParser parser; - - CSVRecord(final CSVParser parser, final String[] values, final String comment, final long recordNumber, - final long characterPosition) { - this.recordNumber = recordNumber; - this.values = values != null ? values : EMPTY_STRING_ARRAY; - this.parser = parser; - this.comment = comment; - this.characterPosition = characterPosition; - } - - /** - * Returns a value by {@link Enum}. - * - * @param e - * an enum - * @return the String at the given enum String - */ - public String get(final Enum e) { - return get(Objects.toString(e, null)); - } - - /** - * Returns a value by index. - * - * @param i - * a column index (0-based) - * @return the String at the given index - */ - public String get(final int i) { - return values[i]; - } - - /** - * Returns a value by name. - * - *

- * Note: This requires a field mapping obtained from the original parser. - * A check using {@link #isMapped(String)} should be used to determine if a - * mapping exists from the provided {@code name} to a field index. In this case an - * exception will only be thrown if the record does not contain a field corresponding - * to the mapping, that is the record length is not consistent with the mapping size. - *

- * - * @param name - * the name of the column to be retrieved. - * @return the column value, maybe null depending on {@link CSVFormat#getNullString()}. - * @throws IllegalStateException - * if no header mapping was provided - * @throws IllegalArgumentException - * if {@code name} is not mapped or if the record is inconsistent - * @see #isMapped(String) - * @see #isConsistent() - * @see #getParser() - * @see CSVFormat#withNullString(String) - */ - public String get(final String name) { - final Map headerMap = getHeaderMapRaw(); - if (headerMap == null) { - throw new IllegalStateException( - "No header mapping was specified, the record values can't be accessed by name"); - } - final Integer index = headerMap.get(name); - if (index == null) { - throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, - headerMap.keySet())); - } - try { - return values[index.intValue()]; - } catch (final ArrayIndexOutOfBoundsException e) { - throw new IllegalArgumentException(String.format( - "Index for header '%s' is %d but CSVRecord only has %d values!", name, index, - Integer.valueOf(values.length))); - } - } - - /** - * Returns the start position of this record as a character position in the source stream. This may or may not - * correspond to the byte position depending on the character set. - * - * @return the position of this record in the source stream. - */ - public long getCharacterPosition() { - return characterPosition; - } - - /** - * Returns the comment for this record, if any. - * Note that comments are attached to the following record. - * If there is no following record (i.e. the comment is at EOF) - * the comment will be ignored. - * - * @return the comment for this record, or null if no comment for this record is available. - */ - public String getComment() { - return comment; - } - - private Map getHeaderMapRaw() { - return parser == null ? null : parser.getHeaderMapRaw(); - } - - /** - * Returns the parser. - * - *

- * Note: The parser is not part of the serialized state of the record. A null check - * should be used when the record may have originated from a serialized form. - *

- * - * @return the parser. - * @since 1.7 - */ - public CSVParser getParser() { - return parser; - } - - /** - * Returns the number of this record in the parsed CSV file. - * - *

- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to - * the current line number of the parser that created this record. - *

- * - * @return the number of this record. - * @see CSVParser#getCurrentLineNumber() - */ - public long getRecordNumber() { - return recordNumber; - } - - /** - * Checks whether this record has a comment, false otherwise. - * Note that comments are attached to the following record. - * If there is no following record (i.e. the comment is at EOF) - * the comment will be ignored. - * - * @return true if this record has a comment, false otherwise - * @since 1.3 - */ - public boolean hasComment() { - return comment != null; - } - - /** - * Tells whether the record size matches the header size. - * - *

- * Returns true if the sizes for this record match and false if not. Some programs can export files that fail this - * test but still produce parsable files. - *

- * - * @return true of this record is valid, false if not - */ - public boolean isConsistent() { - final Map headerMap = getHeaderMapRaw(); - return headerMap == null || headerMap.size() == values.length; - } - - /** - * Checks whether a given column is mapped, i.e. its name has been defined to the parser. - * - * @param name - * the name of the column to be retrieved. - * @return whether a given column is mapped. - */ - public boolean isMapped(final String name) { - final Map headerMap = getHeaderMapRaw(); - return headerMap != null && headerMap.containsKey(name); - } - - /** - * Checks whether a column with given index has a value. - * - * @param index - * a column index (0-based) - * @return whether a column with given index has a value - */ - public boolean isSet(final int index) { - return 0 <= index && index < values.length; - } - - /** - * Checks whether a given columns is mapped and has a value. - * - * @param name - * the name of the column to be retrieved. - * @return whether a given columns is mapped and has a value - */ - public boolean isSet(final String name) { - return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length; - } - - /** - * Returns an iterator over the values of this record. - * - * @return an iterator over the values of this record. - */ - @Override - public Iterator iterator() { - return toList().iterator(); - } - - /** - * Puts all values of this record into the given Map. - * - * @param map - * The Map to populate. - * @return the given map. - * @since 1.9.0 - */ - public > M putIn(final M map) { - if (getHeaderMapRaw() == null) { - return map; - } - for (final Entry entry : getHeaderMapRaw().entrySet()) { - final int col = entry.getValue().intValue(); - if (col < values.length) { - map.put(entry.getKey(), values[col]); - } - } - return map; - } - - /** - * Returns the number of values in this record. - * - * @return the number of values. - */ - public int size() { - return values.length; - } - - /** - * Converts the values to a List. - * - * TODO: Maybe make this public? - * - * @return a new List - */ - private List toList() { - return Arrays.asList(values); - } - - /** - * Copies this record into a new Map of header name to record value. - * - * @return A new Map. The map is empty if the record has no headers. - */ - public Map toMap() { - return putIn(new LinkedHashMap(values.length)); - } - - /** - * Returns a string representation of the contents of this record. The result is constructed by comment, mapping, - * recordNumber and by passing the internal values array to {@link Arrays#toString(Object[])}. - * - * @return a String representation of this record. - */ - @Override - public String toString() { - return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" + - Arrays.toString(values) + "]"; - } - - String[] values() { - return values; - } - -} diff --git a/src/test/resources/org/apache/commons/csv/Constants.java b/src/test/resources/org/apache/commons/csv/Constants.java deleted file mode 100644 index b7dc770a..00000000 --- a/src/test/resources/org/apache/commons/csv/Constants.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -/** - * Constants for this package. - */ -final class Constants { - - static final char BACKSLASH = '\\'; - - static final char BACKSPACE = '\b'; - - static final char COMMA = ','; - - /** - * Starts a comment, the remainder of the line is the comment. - */ - static final char COMMENT = '#'; - - static final char CR = '\r'; - - /** RFC 4180 defines line breaks as CRLF */ - static final String CRLF = "\r\n"; - - static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"'); - - static final String EMPTY = ""; - - /** The end of stream symbol */ - static final int END_OF_STREAM = -1; - - static final char FF = '\f'; - - static final char LF = '\n'; - - /** - * Unicode line separator. - */ - static final String LINE_SEPARATOR = "\u2028"; - - /** - * Unicode next line. - */ - static final String NEXT_LINE = "\u0085"; - - /** - * Unicode paragraph separator. - */ - static final String PARAGRAPH_SEPARATOR = "\u2029"; - - static final char PIPE = '|'; - - /** ASCII record separator */ - static final char RS = 30; - - static final char SP = ' '; - - static final char TAB = '\t'; - - /** Undefined state for the lookahead char */ - static final int UNDEFINED = -2; - - /** ASCII unit separator */ - static final char US = 31; - -} diff --git a/src/test/resources/org/apache/commons/csv/ExtendedBufferedReader.java b/src/test/resources/org/apache/commons/csv/ExtendedBufferedReader.java deleted file mode 100644 index b9ca79df..00000000 --- a/src/test/resources/org/apache/commons/csv/ExtendedBufferedReader.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.END_OF_STREAM; -import static org.apache.commons.csv.Constants.LF; -import static org.apache.commons.csv.Constants.UNDEFINED; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.Reader; - -/** - * A special buffered reader which supports sophisticated read access. - *

- * In particular the reader supports a look-ahead option, which allows you to see the next char returned by - * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}. - *

- */ -final class ExtendedBufferedReader extends BufferedReader { - - /** The last char returned */ - private int lastChar = UNDEFINED; - - /** The count of EOLs (CR/LF/CRLF) seen so far */ - private long eolCounter; - - /** The position, which is number of characters read so far */ - private long position; - - private boolean closed; - - /** - * Created extended buffered reader using default buffer-size - */ - ExtendedBufferedReader(final Reader reader) { - super(reader); - } - - /** - * Closes the stream. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void close() throws IOException { - // Set ivars before calling super close() in case close() throws an IOException. - closed = true; - lastChar = END_OF_STREAM; - super.close(); - } - - /** - * Returns the current line number - * - * @return the current line number - */ - long getCurrentLineNumber() { - // Check if we are at EOL or EOF or just starting - if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) { - return eolCounter; // counter is accurate - } - return eolCounter + 1; // Allow for counter being incremented only at EOL - } - - /** - * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by - * any of the read methods. This will not include a character read using the {@link #lookAhead()} method. If no - * character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached - * on the last read then this will return {@link Constants#END_OF_STREAM}. - * - * @return the last character that was read - */ - int getLastChar() { - return lastChar; - } - - /** - * Gets the character position in the reader. - * - * @return the current position in the reader (counting characters, not bytes since this is a Reader) - */ - long getPosition() { - return this.position; - } - - public boolean isClosed() { - return closed; - } - - /** - * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will - * still return this value. Does not affect line number or last character. - * - * @return the next character - * - * @throws IOException - * if there is an error in reading - */ - int lookAhead() throws IOException { - super.mark(1); - final int c = super.read(); - super.reset(); - - return c; - } - - @Override - public int read() throws IOException { - final int current = super.read(); - if (current == CR || current == LF && lastChar != CR) { - eolCounter++; - } - lastChar = current; - this.position++; - return lastChar; - } - - @Override - public int read(final char[] buf, final int offset, final int length) throws IOException { - if (length == 0) { - return 0; - } - - final int len = super.read(buf, offset, length); - - if (len > 0) { - - for (int i = offset; i < offset + len; i++) { - final char ch = buf[i]; - if (ch == LF) { - if (CR != (i > 0 ? buf[i - 1] : lastChar)) { - eolCounter++; - } - } else if (ch == CR) { - eolCounter++; - } - } - - lastChar = buf[offset + len - 1]; - - } else if (len == -1) { - lastChar = END_OF_STREAM; - } - - position += len; - return len; - } - - /** - * Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called - * when processing a comment, otherwise information can be lost. - *

- * Increments {@link #eolCounter} - *

- * Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise to LF - * - * @return the line that was read, or null if reached EOF. - */ - @Override - public String readLine() throws IOException { - final String line = super.readLine(); - - if (line != null) { - lastChar = LF; // needed for detecting start of line - eolCounter++; - } else { - lastChar = END_OF_STREAM; - } - - return line; - } - -} diff --git a/src/test/resources/org/apache/commons/csv/IOUtils.java b/src/test/resources/org/apache/commons/csv/IOUtils.java deleted file mode 100644 index 1771d4dc..00000000 --- a/src/test/resources/org/apache/commons/csv/IOUtils.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.csv; - -import java.io.IOException; -import java.io.Reader; -import java.io.Writer; -import java.nio.CharBuffer; - -/** Copied from Apache Commons IO. */ -class IOUtils { - - /** - *

- * Copied from Apache Commons IO. - *

- * The default buffer size ({@value}). - */ - static final int DEFAULT_BUFFER_SIZE = 1024 * 4; - - /** - *

- * Copied from Apache Commons IO. - *

- * Represents the end-of-file (or stream). - * @since 2.5 (made public) - */ - private static final int EOF = -1; - - /** - * Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}. - *

- * This method buffers the input internally, so there is no need to use a - * {@code BufferedReader}. - *

- * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}. - * - * @param input the {@code Reader} to read from - * @param output the {@code Appendable} to append to - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 2.7 - */ - static long copy(final Reader input, final Appendable output) throws IOException { - return copy(input, output, CharBuffer.allocate(DEFAULT_BUFFER_SIZE)); - } - - /** - * Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}. - *

- * This method uses the provided buffer, so there is no need to use a - * {@code BufferedReader}. - *

- * - * @param input the {@code Reader} to read from - * @param output the {@code Appendable} to write to - * @param buffer the buffer to be used for the copy - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 2.7 - */ - static long copy(final Reader input, final Appendable output, final CharBuffer buffer) throws IOException { - long count = 0; - int n; - while (EOF != (n = input.read(buffer))) { - buffer.flip(); - output.append(buffer, 0, n); - count += n; - } - return count; - } - - /** - *

- * Copied from Apache Commons IO. - *

- * Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}. - *

- * This method buffers the input internally, so there is no need to use a - * {@code BufferedReader}. - *

- * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}. - * - * @param input the {@code Reader} to read from - * @param output the {@code Writer} to write to - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 1.3 - */ - static long copyLarge(final Reader input, final Writer output) throws IOException { - return copyLarge(input, output, new char[DEFAULT_BUFFER_SIZE]); - } - - /** - *

- * Copied from Apache Commons IO. - *

- * Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}. - *

- * This method uses the provided buffer, so there is no need to use a - * {@code BufferedReader}. - *

- * - * @param input the {@code Reader} to read from - * @param output the {@code Writer} to write to - * @param buffer the buffer to be used for the copy - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 2.2 - */ - static long copyLarge(final Reader input, final Writer output, final char[] buffer) throws IOException { - long count = 0; - int n; - while (EOF != (n = input.read(buffer))) { - output.write(buffer, 0, n); - count += n; - } - return count; - } - -} diff --git a/src/test/resources/org/apache/commons/csv/Lexer.java b/src/test/resources/org/apache/commons/csv/Lexer.java deleted file mode 100644 index 2795ca29..00000000 --- a/src/test/resources/org/apache/commons/csv/Lexer.java +++ /dev/null @@ -1,461 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.BACKSPACE; -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.END_OF_STREAM; -import static org.apache.commons.csv.Constants.FF; -import static org.apache.commons.csv.Constants.LF; -import static org.apache.commons.csv.Constants.TAB; -import static org.apache.commons.csv.Constants.UNDEFINED; -import static org.apache.commons.csv.Token.Type.COMMENT; -import static org.apache.commons.csv.Token.Type.EOF; -import static org.apache.commons.csv.Token.Type.EORECORD; -import static org.apache.commons.csv.Token.Type.INVALID; -import static org.apache.commons.csv.Token.Type.TOKEN; - -import java.io.Closeable; -import java.io.IOException; - -/** - * Lexical analyzer. - */ -final class Lexer implements Closeable { - - private static final String CR_STRING = Character.toString(CR); - private static final String LF_STRING = Character.toString(LF); - - /** - * Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it - * won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two - * chars (using surrogates) and thus there should never be a collision with a real text char. - */ - private static final char DISABLED = '\ufffe'; - - private final char delimiter; - private final char escape; - private final char quoteChar; - private final char commentStart; - - private final boolean ignoreSurroundingSpaces; - private final boolean ignoreEmptyLines; - - /** The input stream */ - private final ExtendedBufferedReader reader; - private String firstEol; - - Lexer(final CSVFormat format, final ExtendedBufferedReader reader) { - this.reader = reader; - this.delimiter = format.getDelimiter(); - this.escape = mapNullToDisabled(format.getEscapeCharacter()); - this.quoteChar = mapNullToDisabled(format.getQuoteCharacter()); - this.commentStart = mapNullToDisabled(format.getCommentMarker()); - this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces(); - this.ignoreEmptyLines = format.getIgnoreEmptyLines(); - } - - /** - * Closes resources. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void close() throws IOException { - reader.close(); - } - - /** - * Returns the current character position - * - * @return the current character position - */ - long getCharacterPosition() { - return reader.getPosition(); - } - - /** - * Returns the current line number - * - * @return the current line number - */ - long getCurrentLineNumber() { - return reader.getCurrentLineNumber(); - } - - String getFirstEol(){ - return firstEol; - } - - boolean isClosed() { - return reader.isClosed(); - } - - boolean isCommentStart(final int ch) { - return ch == commentStart; - } - - boolean isDelimiter(final int ch) { - return ch == delimiter; - } - - /** - * @return true if the given character indicates end of file - */ - boolean isEndOfFile(final int ch) { - return ch == END_OF_STREAM; - } - - boolean isEscape(final int ch) { - return ch == escape; - } - - private boolean isMetaChar(final int ch) { - return ch == delimiter || - ch == escape || - ch == quoteChar || - ch == commentStart; - } - - boolean isQuoteChar(final int ch) { - return ch == quoteChar; - } - - /** - * Checks if the current character represents the start of a line: a CR, LF or is at the start of the file. - * - * @param ch the character to check - * @return true if the character is at the start of a line. - */ - boolean isStartOfLine(final int ch) { - return ch == LF || ch == CR || ch == UNDEFINED; - } - - /** - * @return true if the given char is a whitespace character - */ - boolean isWhitespace(final int ch) { - return !isDelimiter(ch) && Character.isWhitespace((char) ch); - } - - private char mapNullToDisabled(final Character c) { - return c == null ? DISABLED : c.charValue(); - } - - /** - * Returns the next token. - *

- * A token corresponds to a term, a record change or an end-of-file indicator. - *

- * - * @param token - * an existing Token object to reuse. The caller is responsible to initialize the Token. - * @return the next token found - * @throws java.io.IOException - * on stream access error - */ - Token nextToken(final Token token) throws IOException { - - // get the last read char (required for empty line detection) - int lastChar = reader.getLastChar(); - - // read the next char and set eol - int c = reader.read(); - /* - * Note: The following call will swallow LF if c == CR. But we don't need to know if the last char was CR or LF - * - they are equivalent here. - */ - boolean eol = readEndOfLine(c); - - // empty line detection: eol AND (last char was EOL or beginning) - if (ignoreEmptyLines) { - while (eol && isStartOfLine(lastChar)) { - // go on char ahead ... - lastChar = c; - c = reader.read(); - eol = readEndOfLine(c); - // reached end of file without any content (empty line at the end) - if (isEndOfFile(c)) { - token.type = EOF; - // don't set token.isReady here because no content - return token; - } - } - } - - // did we reach eof during the last iteration already ? EOF - if (isEndOfFile(lastChar) || !isDelimiter(lastChar) && isEndOfFile(c)) { - token.type = EOF; - // don't set token.isReady here because no content - return token; - } - - if (isStartOfLine(lastChar) && isCommentStart(c)) { - final String line = reader.readLine(); - if (line == null) { - token.type = EOF; - // don't set token.isReady here because no content - return token; - } - final String comment = line.trim(); - token.content.append(comment); - token.type = COMMENT; - return token; - } - - // important: make sure a new char gets consumed in each iteration - while (token.type == INVALID) { - // ignore whitespaces at beginning of a token - if (ignoreSurroundingSpaces) { - while (isWhitespace(c) && !eol) { - c = reader.read(); - eol = readEndOfLine(c); - } - } - - // ok, start of token reached: encapsulated, or token - if (isDelimiter(c)) { - // empty token return TOKEN("") - token.type = TOKEN; - } else if (eol) { - // empty token return EORECORD("") - // noop: token.content.append(""); - token.type = EORECORD; - } else if (isQuoteChar(c)) { - // consume encapsulated token - parseEncapsulatedToken(token); - } else if (isEndOfFile(c)) { - // end of file return EOF() - // noop: token.content.append(""); - token.type = EOF; - token.isReady = true; // there is data at EOF - } else { - // next token must be a simple token - // add removed blanks when not ignoring whitespace chars... - parseSimpleToken(token, c); - } - } - return token; - } - - /** - * Parses an encapsulated token. - *

- * Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included - * in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after - * an encapsulated token are ignored. The token is finished when one of the following conditions become true: - *

    - *
  • an unescaped encapsulator has been reached, and is followed by optional whitespace then:
  • - *
      - *
    • delimiter (TOKEN)
    • - *
    • end of line (EORECORD)
    • - *
    - *
  • end of stream has been reached (EOF)
- * - * @param token - * the current token - * @return a valid token object - * @throws IOException - * on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL - */ - private Token parseEncapsulatedToken(final Token token) throws IOException { - // save current line number in case needed for IOE - final long startLineNumber = getCurrentLineNumber(); - int c; - while (true) { - c = reader.read(); - - if (isEscape(c)) { - final int unescaped = readEscape(); - if (unescaped == END_OF_STREAM) { // unexpected char after escape - token.content.append((char) c).append((char) reader.getLastChar()); - } else { - token.content.append((char) unescaped); - } - } else if (isQuoteChar(c)) { - if (isQuoteChar(reader.lookAhead())) { - // double or escaped encapsulator -> add single encapsulator to token - c = reader.read(); - token.content.append((char) c); - } else { - // token finish mark (encapsulator) reached: ignore whitespace till delimiter - while (true) { - c = reader.read(); - if (isDelimiter(c)) { - token.type = TOKEN; - return token; - } else if (isEndOfFile(c)) { - token.type = EOF; - token.isReady = true; // There is data at EOF - return token; - } else if (readEndOfLine(c)) { - token.type = EORECORD; - return token; - } else if (!isWhitespace(c)) { - // error invalid char between token and next delimiter - throw new IOException("(line " + getCurrentLineNumber() + - ") invalid char between encapsulated token and delimiter"); - } - } - } - } else if (isEndOfFile(c)) { - // error condition (end of file before end of token) - throw new IOException("(startline " + startLineNumber + - ") EOF reached before encapsulated token finished"); - } else { - // consume character - token.content.append((char) c); - } - } - } - - /** - * Parses a simple token. - *

- * Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped - * delimiters (as \, or \;). The token is finished when one of the following conditions become true: - *

    - *
  • end of line has been reached (EORECORD)
  • - *
  • end of stream has been reached (EOF)
  • - *
  • an unescaped delimiter has been reached (TOKEN)
  • - *
- * - * @param token - * the current token - * @param ch - * the current character - * @return the filled token - * @throws IOException - * on stream access error - */ - private Token parseSimpleToken(final Token token, int ch) throws IOException { - // Faster to use while(true)+break than while(token.type == INVALID) - while (true) { - if (readEndOfLine(ch)) { - token.type = EORECORD; - break; - } else if (isEndOfFile(ch)) { - token.type = EOF; - token.isReady = true; // There is data at EOF - break; - } else if (isDelimiter(ch)) { - token.type = TOKEN; - break; - } else if (isEscape(ch)) { - final int unescaped = readEscape(); - if (unescaped == END_OF_STREAM) { // unexpected char after escape - token.content.append((char) ch).append((char) reader.getLastChar()); - } else { - token.content.append((char) unescaped); - } - ch = reader.read(); // continue - } else { - token.content.append((char) ch); - ch = reader.read(); // continue - } - } - - if (ignoreSurroundingSpaces) { - trimTrailingSpaces(token.content); - } - - return token; - } - - /** - * Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character... - * - * @return true if the given or next character is a line-terminator - */ - boolean readEndOfLine(int ch) throws IOException { - // check if we have \r\n... - if (ch == CR && reader.lookAhead() == LF) { - // note: does not change ch outside of this method! - ch = reader.read(); - // Save the EOL state - if (firstEol == null) { - this.firstEol = Constants.CRLF; - } - } - // save EOL state here. - if (firstEol == null) { - if (ch == LF) { - this.firstEol = LF_STRING; - } else if (ch == CR) { - this.firstEol = CR_STRING; - } - } - - return ch == LF || ch == CR; - } - - // TODO escape handling needs more work - /** - * Handle an escape sequence. - * The current character must be the escape character. - * On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()} - * on the input stream. - * - * @return the unescaped character (as an int) or {@link Constants#END_OF_STREAM} if char following the escape is - * invalid. - * @throws IOException if there is a problem reading the stream or the end of stream is detected: - * the escape character is not allowed at end of stream - */ - int readEscape() throws IOException { - // the escape char has just been read (normally a backslash) - final int ch = reader.read(); - switch (ch) { - case 'r': - return CR; - case 'n': - return LF; - case 't': - return TAB; - case 'b': - return BACKSPACE; - case 'f': - return FF; - case CR: - case LF: - case FF: // TODO is this correct? - case TAB: // TODO is this correct? Do tabs need to be escaped? - case BACKSPACE: // TODO is this correct? - return ch; - case END_OF_STREAM: - throw new IOException("EOF whilst processing escape sequence"); - default: - // Now check for meta-characters - if (isMetaChar(ch)) { - return ch; - } - // indicate unexpected char - available from in.getLastChar() - return END_OF_STREAM; - } - } - - void trimTrailingSpaces(final StringBuilder buffer) { - int length = buffer.length(); - while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) { - length = length - 1; - } - if (length != buffer.length()) { - buffer.setLength(length); - } - } -} diff --git a/src/test/resources/org/apache/commons/csv/QuoteMode.java b/src/test/resources/org/apache/commons/csv/QuoteMode.java deleted file mode 100644 index 272deb73..00000000 --- a/src/test/resources/org/apache/commons/csv/QuoteMode.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.csv; - -/** - * Defines quoting behavior when printing. - */ -public enum QuoteMode { - - /** - * Quotes all fields. - */ - ALL, - - /** - * Quotes all non-null fields. - */ - ALL_NON_NULL, - - /** - * Quotes fields which contain special characters such as a the field delimiter, quote character or any of the - * characters in the line separator string. - */ - MINIMAL, - - /** - * Quotes all non-numeric fields. - */ - NON_NUMERIC, - - /** - * Never quotes fields. When the delimiter occurs in data, the printer prefixes it with the escape character. If the - * escape character is not set, format validation throws an exception. - */ - NONE -} diff --git a/src/test/resources/org/apache/commons/csv/Token.java b/src/test/resources/org/apache/commons/csv/Token.java deleted file mode 100644 index dff7d018..00000000 --- a/src/test/resources/org/apache/commons/csv/Token.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Token.Type.INVALID; - -/** - * Internal token representation. - *

- * It is used as contract between the lexer and the parser. - */ -final class Token { - - enum Type { - /** Token has no valid content, i.e. is in its initialized state. */ - INVALID, - - /** Token with content, at beginning or in the middle of a line. */ - TOKEN, - - /** Token (which can have content) when the end of file is reached. */ - EOF, - - /** Token with content when the end of a line is reached. */ - EORECORD, - - /** Token is a comment line. */ - COMMENT - } - - /** length of the initial token (content-)buffer */ - private static final int INITIAL_TOKEN_LENGTH = 50; - - /** Token type */ - Token.Type type = INVALID; - - /** The content buffer. */ - final StringBuilder content = new StringBuilder(INITIAL_TOKEN_LENGTH); - - /** Token ready flag: indicates a valid token with content (ready for the parser). */ - boolean isReady; - - void reset() { - content.setLength(0); - type = INVALID; - isReady = false; - } - - /** - * Eases IDE debugging. - * - * @return a string helpful for debugging. - */ - @Override - public String toString() { - return type.name() + " [" + content.toString() + "]"; - } -} diff --git a/src/test/resources/org/apache/commons/csv/package-info.java b/src/test/resources/org/apache/commons/csv/package-info.java deleted file mode 100644 index 29e7fef6..00000000 --- a/src/test/resources/org/apache/commons/csv/package-info.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Apache Commons CSV Format Support. - * - *

CSV are widely used as interfaces to legacy systems or manual data-imports. - * CSV stands for "Comma Separated Values" (or sometimes "Character Separated - * Values"). The CSV data format is defined in - * RFC 4180 - * but many dialects exist.

- * - *

Common to all file dialects is its basic structure: The CSV data-format - * is record oriented, whereas each record starts on a new textual line. A - * record is build of a list of values. Keep in mind that not all records - * must have an equal number of values:

- *
- *       csv    := records*
- *       record := values*
- * 
- * - *

The following list contains the CSV aspects the Commons CSV parser supports:

- *
- *
Separators (for lines)
- *
The record separators are hardcoded and cannot be changed. The must be '\r', '\n' or '\r\n'.
- * - *
Delimiter (for values)
- *
The delimiter for values is freely configurable (default ',').
- * - *
Comments
- *
Some CSV-dialects support a simple comment syntax. A comment is a record - * which must start with a designated character (the commentStarter). A record - * of this kind is treated as comment and gets removed from the input (default none)
- * - *
Encapsulator
- *
Two encapsulator characters (default '"') are used to enclose -> complex values.
- * - *
Simple values
- *
A simple value consist of all characters (except the delimiter) until - * (but not including) the next delimiter or a record-terminator. Optionally - * all surrounding whitespaces of a simple value can be ignored (default: true).
- * - *
Complex values
- *
Complex values are encapsulated within a pair of the defined encapsulator characters. - * The encapsulator itself must be escaped or doubled when used inside complex values. - * Complex values preserve all kind of formatting (including newlines -> multiline-values)
- * - *
Empty line skipping
- *
Optionally empty lines in CSV files can be skipped. - * Otherwise, empty lines will return a record with a single empty value.
- *
- * - *

In addition to individually defined dialects, two predefined dialects (strict-csv, and excel-csv) - * can be set directly.

- * - *

Example usage:

- *
- * Reader in = new StringReader("a,b,c");
- * for (CSVRecord record : CSVFormat.DEFAULT.parse(in)) {
- *     for (String field : record) {
- *         System.out.print("\"" + field + "\", ");
- *     }
- *     System.out.println();
- * }
- * 
- */ - -package org.apache.commons.csv;