diff --git a/src/test/resources/org/apache/commons/csv/CSVFormat.java b/src/test/resources/org/apache/commons/csv/CSVFormat.java deleted file mode 100644 index c00f993f..00000000 --- a/src/test/resources/org/apache/commons/csv/CSVFormat.java +++ /dev/null @@ -1,2330 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.BACKSLASH; -import static org.apache.commons.csv.Constants.COMMA; -import static org.apache.commons.csv.Constants.COMMENT; -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.CRLF; -import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR; -import static org.apache.commons.csv.Constants.EMPTY; -import static org.apache.commons.csv.Constants.LF; -import static org.apache.commons.csv.Constants.PIPE; -import static org.apache.commons.csv.Constants.SP; -import static org.apache.commons.csv.Constants.TAB; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.Reader; -import java.io.Serializable; -import java.io.StringWriter; -import java.io.Writer; -import java.nio.charset.Charset; -import java.nio.file.Files; -import java.nio.file.Path; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; - -/** - * Specifies the format of a CSV file and parses input. - * - *
- * You can use one of the predefined formats: - *
- * - *- * For example: - *
- * - *- * CSVParser parser = CSVFormat.EXCEL.parse(reader); - *- * - *
- * The {@link CSVParser} provides static methods to parse other input types, for example: - *
- * - *- * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL); - *- * - *
- * You can extend a format by calling the {@code with} methods. For example: - *
- * - *- * CSVFormat.EXCEL.withNullString("N/A").withIgnoreSurroundingSpaces(true); - *- * - *
- * To define the column names you want to use to access records, write: - *
- * - *- * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3"); - *- * - *
- * Calling {@link #withHeader(String...)} lets you use the given names to address values in a {@link CSVRecord}, and - * assumes that your CSV source does not contain a first record that also defines column names. - * - * If it does, then you are overriding this metadata with your names and you should skip the first record by calling - * {@link #withSkipHeaderRecord(boolean)} with {@code true}. - *
- * - *- * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write: - *
- * - *- * Reader in = ...; - * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3").parse(in); - *- * - *
- * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}. - *
- * - *- * If your source contains a header record, you can simplify your code and safely reference columns, by using - * {@link #withHeader(String...)} with no arguments: - *
- * - *- * CSVFormat.EXCEL.withHeader(); - *- * - *
- * This causes the parser to read the first record and use its values as column names. - * - * Then, call one of the {@link CSVRecord} get method that takes a String column name argument: - *
- * - *- * String value = record.get("Col1"); - *- * - *
- * This makes your code impervious to changes in column order in the CSV file. - *
- * - *- * This class is immutable. - *
- */ -public final class CSVFormat implements Serializable { - - /** - * Predefines formats. - * - * @since 1.2 - */ - public enum Predefined { - - /** - * @see CSVFormat#DEFAULT - */ - Default(CSVFormat.DEFAULT), - - /** - * @see CSVFormat#EXCEL - */ - Excel(CSVFormat.EXCEL), - - /** - * @see CSVFormat#INFORMIX_UNLOAD - * @since 1.3 - */ - InformixUnload(CSVFormat.INFORMIX_UNLOAD), - - /** - * @see CSVFormat#INFORMIX_UNLOAD_CSV - * @since 1.3 - */ - InformixUnloadCsv(CSVFormat.INFORMIX_UNLOAD_CSV), - - /** - * @see CSVFormat#MONGODB_CSV - * @since 1.7 - */ - MongoDBCsv(CSVFormat.MONGODB_CSV), - - /** - * @see CSVFormat#MONGODB_TSV - * @since 1.7 - */ - MongoDBTsv(CSVFormat.MONGODB_TSV), - - /** - * @see CSVFormat#MYSQL - */ - MySQL(CSVFormat.MYSQL), - - /** - * @see CSVFormat#ORACLE - */ - Oracle(CSVFormat.ORACLE), - - /** - * @see CSVFormat#POSTGRESQL_CSV - * @since 1.5 - */ - PostgreSQLCsv(CSVFormat.POSTGRESQL_CSV), - - /** - * @see CSVFormat#POSTGRESQL_CSV - */ - PostgreSQLText(CSVFormat.POSTGRESQL_TEXT), - - /** - * @see CSVFormat#RFC4180 - */ - RFC4180(CSVFormat.RFC4180), - - /** - * @see CSVFormat#TDF - */ - TDF(CSVFormat.TDF); - - private final CSVFormat format; - - Predefined(final CSVFormat format) { - this.format = format; - } - - /** - * Gets the format. - * - * @return the format. - */ - public CSVFormat getFormat() { - return format; - } - } - - /** - * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines. - * - *- * Settings are: - *
- *- * For example for parsing or generating a CSV file on a French system the following format will be used: - *
- * - *- * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';'); - *- * - *
- * Settings are: - *
- *- * Note: This is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean) - * withAllowMissingColumnNames(true)} and {@link #withIgnoreEmptyLines(boolean) withIgnoreEmptyLines(false)}. - *
- * - * @see Predefined#Excel - */ - // @formatter:off - public static final CSVFormat EXCEL = DEFAULT - .withIgnoreEmptyLines(false) - .withAllowMissingColumnNames(); - // @formatter:on - - /** - * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation. - * - *- * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special - * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. - *
- * - *- * Settings are: - *
- *- * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special - * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. - *
- * - *- * Settings are: - *
- *- * Parsing is not supported yet. - *
- * - *- * This is a comma-delimited format. Values are double quoted only if needed and special characters are escaped with - * {@code '"'}. A header line with field names is expected. - *
- * - *- * Settings are: - *
- *- * Parsing is not supported yet. - *
- * - *- * This is a tab-delimited format. Values are double quoted only if needed and special - * characters are escaped with {@code '"'}. A header line with field names is expected. - *
- * - *- * Settings are: - *
- *- * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special - * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. - *
- * - *- * Settings are: - *
- *- * This is a comma-delimited format with the system line separator character as the record separator.Values are - * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is - * {@code ""}. Values are trimmed. - *
- * - *- * Settings are: - *
- *- * This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special - * characters are escaped with {@code '"'}. The default NULL string is {@code ""}. - *
- * - *- * Settings are: - *
- *- * This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special - * characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}. - *
- * - *- * Settings are: - *
- *- * Settings are: - *
- *- * Settings are: - *
- *- * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized - * with null/false. - *
- * - * @param delimiter - * the char used for value separation, must not be a line break character - * @return a new CSV format. - * @throws IllegalArgumentException - * if the delimiter is a line break character - * - * @see #DEFAULT - * @see #RFC4180 - * @see #MYSQL - * @see #EXCEL - * @see #TDF - */ - public static CSVFormat newFormat(final char delimiter) { - return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false, - false, false, false, false, true); - } - - /** - * Gets one of the predefined formats from {@link CSVFormat.Predefined}. - * - * @param format - * name - * @return one of the predefined formats - * @since 1.2 - */ - public static CSVFormat valueOf(final String format) { - return CSVFormat.Predefined.valueOf(format).getFormat(); - } - - private final boolean allowDuplicateHeaderNames; - - private final boolean allowMissingColumnNames; - - private final boolean autoFlush; - - private final Character commentMarker; // null if commenting is disabled - - private final char delimiter; - - private final Character escapeCharacter; // null if escaping is disabled - - private final String[] header; // array of header column names - - private final String[] headerComments; // array of header comment lines - - private final boolean ignoreEmptyLines; - - private final boolean ignoreHeaderCase; // should ignore header names case - - private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values? - - private final String nullString; // the string to be used for null values - - private final Character quoteCharacter; // null if quoting is disabled - - private final String quotedNullString; - - private final QuoteMode quoteMode; - - private final String recordSeparator; // for outputs - - private final boolean skipHeaderRecord; - - private final boolean trailingDelimiter; - - private final boolean trim; - - /** - * Creates a customized CSV format. - * - * @param delimiter - * the char used for value separation, must not be a line break character - * @param quoteChar - * the Character used as value encapsulation marker, may be {@code null} to disable - * @param quoteMode - * the quote mode - * @param commentStart - * the Character used for comment identification, may be {@code null} to disable - * @param escape - * the Character used to escape special characters in values, may be {@code null} to disable - * @param ignoreSurroundingSpaces - * {@code true} when whitespaces enclosing values should be ignored - * @param ignoreEmptyLines - * {@code true} when the parser should skip empty lines - * @param recordSeparator - * the line separator to use for output - * @param nullString - * the line separator to use for output - * @param headerComments - * the comments to be printed by the Printer before the actual CSV data - * @param header - * the header - * @param skipHeaderRecord - * TODO - * @param allowMissingColumnNames - * TODO - * @param ignoreHeaderCase - * TODO - * @param trim - * TODO - * @param trailingDelimiter - * TODO - * @param autoFlush - * @throws IllegalArgumentException - * if the delimiter is a line break character - */ - private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMode quoteMode, - final Character commentStart, final Character escape, final boolean ignoreSurroundingSpaces, - final boolean ignoreEmptyLines, final String recordSeparator, final String nullString, - final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, - final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim, - final boolean trailingDelimiter, final boolean autoFlush, final boolean allowDuplicateHeaderNames) { - this.delimiter = delimiter; - this.quoteCharacter = quoteChar; - this.quoteMode = quoteMode; - this.commentMarker = commentStart; - this.escapeCharacter = escape; - this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; - this.allowMissingColumnNames = allowMissingColumnNames; - this.ignoreEmptyLines = ignoreEmptyLines; - this.recordSeparator = recordSeparator; - this.nullString = nullString; - this.headerComments = toStringArray(headerComments); - this.header = header == null ? null : header.clone(); - this.skipHeaderRecord = skipHeaderRecord; - this.ignoreHeaderCase = ignoreHeaderCase; - this.trailingDelimiter = trailingDelimiter; - this.trim = trim; - this.autoFlush = autoFlush; - this.quotedNullString = quoteCharacter + nullString + quoteCharacter; - this.allowDuplicateHeaderNames = allowDuplicateHeaderNames; - validate(); - } - - @Override - public boolean equals(final Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - - final CSVFormat other = (CSVFormat) obj; - if (delimiter != other.delimiter) { - return false; - } - if (trailingDelimiter != other.trailingDelimiter) { - return false; - } - if (autoFlush != other.autoFlush) { - return false; - } - if (trim != other.trim) { - return false; - } - if (allowMissingColumnNames != other.allowMissingColumnNames) { - return false; - } - if (allowDuplicateHeaderNames != other.allowDuplicateHeaderNames) { - return false; - } - if (ignoreHeaderCase != other.ignoreHeaderCase) { - return false; - } - if (quoteMode != other.quoteMode) { - return false; - } - if (quoteCharacter == null) { - if (other.quoteCharacter != null) { - return false; - } - } else if (!quoteCharacter.equals(other.quoteCharacter)) { - return false; - } - if (commentMarker == null) { - if (other.commentMarker != null) { - return false; - } - } else if (!commentMarker.equals(other.commentMarker)) { - return false; - } - if (escapeCharacter == null) { - if (other.escapeCharacter != null) { - return false; - } - } else if (!escapeCharacter.equals(other.escapeCharacter)) { - return false; - } - if (nullString == null) { - if (other.nullString != null) { - return false; - } - } else if (!nullString.equals(other.nullString)) { - return false; - } - if (!Arrays.equals(header, other.header)) { - return false; - } - if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) { - return false; - } - if (ignoreEmptyLines != other.ignoreEmptyLines) { - return false; - } - if (skipHeaderRecord != other.skipHeaderRecord) { - return false; - } - if (recordSeparator == null) { - if (other.recordSeparator != null) { - return false; - } - } else if (!recordSeparator.equals(other.recordSeparator)) { - return false; - } - if (!Arrays.equals(headerComments, other.headerComments)) { - return false; - } - return true; - } - - /** - * Formats the specified values. - * - * @param values - * the values to format - * @return the formatted values - */ - public String format(final Object... values) { - final StringWriter out = new StringWriter(); - try (CSVPrinter csvPrinter = new CSVPrinter(out, this)) { - csvPrinter.printRecord(values); - String res = out.toString(); - int len = recordSeparator != null ? res.length() - recordSeparator.length() : res.length(); - return res.substring(0, len); - } catch (final IOException e) { - // should not happen because a StringWriter does not do IO. - throw new IllegalStateException(e); - } - } - - /** - * Returns true if and only if duplicate names are allowed in the headers. - * - * @return whether duplicate header names are allowed - * @since 1.7 - */ - public boolean getAllowDuplicateHeaderNames() { - return allowDuplicateHeaderNames; - } - - /** - * Specifies whether missing column names are allowed when parsing the header line. - * - * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an - * {@link IllegalArgumentException}. - */ - public boolean getAllowMissingColumnNames() { - return allowMissingColumnNames; - } - - /** - * Returns whether to flush on close. - * - * @return whether to flush on close. - * @since 1.6 - */ - public boolean getAutoFlush() { - return autoFlush; - } - - /** - * Returns the character marking the start of a line comment. - * - * @return the comment start marker, may be {@code null} - */ - public Character getCommentMarker() { - return commentMarker; - } - - /** - * Returns the character delimiting the values (typically ';', ',' or '\t'). - * - * @return the delimiter character - */ - public char getDelimiter() { - return delimiter; - } - - /** - * Returns the escape character. - * - * @return the escape character, may be {@code null} - */ - public Character getEscapeCharacter() { - return escapeCharacter; - } - - /** - * Returns a copy of the header array. - * - * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file - */ - public String[] getHeader() { - return header != null ? header.clone() : null; - } - - /** - * Returns a copy of the header comment array. - * - * @return a copy of the header comment array; {@code null} if disabled. - */ - public String[] getHeaderComments() { - return headerComments != null ? headerComments.clone() : null; - } - - /** - * Specifies whether empty lines between records are ignored when parsing input. - * - * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty - * records. - */ - public boolean getIgnoreEmptyLines() { - return ignoreEmptyLines; - } - - /** - * Specifies whether header names will be accessed ignoring case. - * - * @return {@code true} if header names cases are ignored, {@code false} if they are case sensitive. - * @since 1.3 - */ - public boolean getIgnoreHeaderCase() { - return ignoreHeaderCase; - } - - /** - * Specifies whether spaces around values are ignored when parsing input. - * - * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value. - */ - public boolean getIgnoreSurroundingSpaces() { - return ignoreSurroundingSpaces; - } - - /** - * Gets the String to convert to and from {@code null}. - *- * See also the various static parse methods on {@link CSVParser}. - *
- * - * @param in - * the input stream - * @return a parser over a stream of {@link CSVRecord}s. - * @throws IOException - * If an I/O error occurs - */ - public CSVParser parse(final Reader in) throws IOException { - return new CSVParser(in, this); - } - - /** - * Prints to the specified output. - * - *- * See also {@link CSVPrinter}. - *
- * - * @param out - * the output. - * @return a printer to an output. - * @throws IOException - * thrown if the optional header cannot be printed. - */ - public CSVPrinter print(final Appendable out) throws IOException { - return new CSVPrinter(out, this); - } - - /** - * Prints to the specified output. - * - *- * See also {@link CSVPrinter}. - *
- * - * @param out - * the output. - * @param charset - * A charset. - * @return a printer to an output. - * @throws IOException - * thrown if the optional header cannot be printed. - * @since 1.5 - */ - @SuppressWarnings("resource") - public CSVPrinter print(final File out, final Charset charset) throws IOException { - // The writer will be closed when close() is called. - return new CSVPrinter(new OutputStreamWriter(new FileOutputStream(out), charset), this); - } - - /** - * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated - * as needed. Useful when one wants to avoid creating CSVPrinters. - * Trims the value if {@link #getTrim()} is true - * @param value - * value to output. - * @param out - * where to print the value. - * @param newRecord - * if this a new record. - * @throws IOException - * If an I/O error occurs. - * @since 1.4 - */ - public void print(final Object value, final Appendable out, final boolean newRecord) throws IOException { - // null values are considered empty - // Only call CharSequence.toString() if you have to, helps GC-free use cases. - CharSequence charSequence; - if (value == null) { - // https://issues.apache.org/jira/browse/CSV-203 - if (null == nullString) { - charSequence = EMPTY; - } else { - if (QuoteMode.ALL == quoteMode) { - charSequence = quotedNullString; - } else { - charSequence = nullString; - } - } - } else { - if (value instanceof CharSequence) { - charSequence = (CharSequence) value; - } else if (value instanceof Reader) { - print((Reader) value, out, newRecord); - return; - } else { - charSequence = value.toString(); - } - } - charSequence = getTrim() ? trim(charSequence) : charSequence; - print(value, charSequence, out, newRecord); - } - - private void print(final Object object, final CharSequence value, final Appendable out, final boolean newRecord) - throws IOException { - final int offset = 0; - final int len = value.length(); - if (!newRecord) { - out.append(getDelimiter()); - } - if (object == null) { - out.append(value); - } else if (isQuoteCharacterSet()) { - // the original object is needed so can check for Number - printWithQuotes(object, value, out, newRecord); - } else if (isEscapeCharacterSet()) { - printWithEscapes(value, out); - } else { - out.append(value, offset, len); - } - } - - /** - * Prints to the specified output, returns a {@code CSVPrinter} which the caller MUST close. - * - *- * See also {@link CSVPrinter}. - *
- * - * @param out the output. - * @param charset A charset. - * @return a printer to an output. - * @throws IOException thrown if the optional header cannot be printed. - * @since 1.5 - */ - @SuppressWarnings("resource") - public CSVPrinter print(final Path out, final Charset charset) throws IOException { - return print(Files.newBufferedWriter(out, charset)); - } - - private void print(final Reader reader, final Appendable out, final boolean newRecord) throws IOException { - // Reader is never null - if (!newRecord) { - out.append(getDelimiter()); - } - if (isQuoteCharacterSet()) { - printWithQuotes(reader, out); - } else if (isEscapeCharacterSet()) { - printWithEscapes(reader, out); - } else if (out instanceof Writer) { - IOUtils.copyLarge(reader, (Writer) out); - } else { - IOUtils.copy(reader, out); - } - - } - - /** - * Prints to the {@link System#out}. - * - *- * See also {@link CSVPrinter}. - *
- * - * @return a printer to {@link System#out}. - * @throws IOException - * thrown if the optional header cannot be printed. - * @since 1.5 - */ - public CSVPrinter printer() throws IOException { - return new CSVPrinter(System.out, this); - } - - /** - * Outputs the trailing delimiter (if set) followed by the record separator (if set). - * - * @param out - * where to write - * @throws IOException - * If an I/O error occurs - * @since 1.4 - */ - public void println(final Appendable out) throws IOException { - if (getTrailingDelimiter()) { - out.append(getDelimiter()); - } - if (recordSeparator != null) { - out.append(recordSeparator); - } - } - - /** - * Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the - * record separator. - * - *- * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record - * separator to the output after printing the record, so there is no need to call {@link #println(Appendable)}. - *
- * - * @param out - * where to write. - * @param values - * values to output. - * @throws IOException - * If an I/O error occurs. - * @since 1.4 - */ - public void printRecord(final Appendable out, final Object... values) throws IOException { - for (int i = 0; i < values.length; i++) { - print(values[i], out, i == 0); - } - println(out); - } - - /* - * Note: must only be called if escaping is enabled, otherwise will generate NPE - */ - private void printWithEscapes(final CharSequence value, final Appendable out) throws IOException { - int start = 0; - int pos = 0; - final int len = value.length(); - final int end = len; - - final char delim = getDelimiter(); - final char escape = getEscapeCharacter().charValue(); - - while (pos < end) { - char c = value.charAt(pos); - if (c == CR || c == LF || c == delim || c == escape) { - // write out segment up until this char - if (pos > start) { - out.append(value, start, pos); - } - if (c == LF) { - c = 'n'; - } else if (c == CR) { - c = 'r'; - } - - out.append(escape); - out.append(c); - - start = pos + 1; // start on the current char after this one - } - pos++; - } - - // write last segment - if (pos > start) { - out.append(value, start, pos); - } - } - - private void printWithEscapes(final Reader reader, final Appendable out) throws IOException { - int start = 0; - int pos = 0; - - final char delim = getDelimiter(); - final char escape = getEscapeCharacter().charValue(); - final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); - - int c; - while (-1 != (c = reader.read())) { - builder.append((char) c); - if (c == CR || c == LF || c == delim || c == escape) { - // write out segment up until this char - if (pos > start) { - out.append(builder.substring(start, pos)); - builder.setLength(0); - pos = -1; - } - if (c == LF) { - c = 'n'; - } else if (c == CR) { - c = 'r'; - } - - out.append(escape); - out.append((char) c); - - start = pos + 1; // start on the current char after this one - } - pos++; - } - - // write last segment - if (pos > start) { - out.append(builder.substring(start, pos)); - } - } - - /* - * Note: must only be called if quoting is enabled, otherwise will generate NPE - */ - // the original object is needed so can check for Number - private void printWithQuotes(final Object object, final CharSequence value, final Appendable out, - final boolean newRecord) throws IOException { - boolean quote = false; - int start = 0; - int pos = 0; - final int len = value.length(); - final int end = len; - - final char delimChar = getDelimiter(); - final char quoteChar = getQuoteCharacter().charValue(); - // If escape char not specified, default to the quote char - // This avoids having to keep checking whether there is an escape character - // at the cost of checking against quote twice - final char escapeChar = isEscapeCharacterSet() ? getEscapeCharacter().charValue() : quoteChar; - - QuoteMode quoteModePolicy = getQuoteMode(); - if (quoteModePolicy == null) { - quoteModePolicy = QuoteMode.MINIMAL; - } - switch (quoteModePolicy) { - case ALL: - case ALL_NON_NULL: - quote = true; - break; - case NON_NUMERIC: - quote = !(object instanceof Number); - break; - case NONE: - // Use the existing escaping code - printWithEscapes(value, out); - return; - case MINIMAL: - if (len <= 0) { - // always quote an empty token that is the first - // on the line, as it may be the only thing on the - // line. If it were not quoted in that case, - // an empty line has no tokens. - if (newRecord) { - quote = true; - } - } else { - char c = value.charAt(pos); - - if (c <= COMMENT) { - // Some other chars at the start of a value caused the parser to fail, so for now - // encapsulate if we start in anything less than '#'. We are being conservative - // by including the default comment char too. - quote = true; - } else { - while (pos < end) { - c = value.charAt(pos); - if (c == LF || c == CR || c == quoteChar || c == delimChar || c == escapeChar) { - quote = true; - break; - } - pos++; - } - - if (!quote) { - pos = end - 1; - c = value.charAt(pos); - // Some other chars at the end caused the parser to fail, so for now - // encapsulate if we end in anything less than ' ' - if (c <= SP) { - quote = true; - } - } - } - } - - if (!quote) { - // no encapsulation needed - write out the original value - out.append(value, start, end); - return; - } - break; - default: - throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); - } - - if (!quote) { - // no encapsulation needed - write out the original value - out.append(value, start, end); - return; - } - - // we hit something that needed encapsulation - out.append(quoteChar); - - // Pick up where we left off: pos should be positioned on the first character that caused - // the need for encapsulation. - while (pos < end) { - final char c = value.charAt(pos); - if (c == quoteChar || c == escapeChar) { - // write out the chunk up until this point - out.append(value, start, pos); - out.append(escapeChar); // now output the escape - start = pos; // and restart with the matched char - } - pos++; - } - - // write the last segment - out.append(value, start, pos); - out.append(quoteChar); - } - - /** - * Always use quotes unless QuoteMode is NONE, so we not have to look ahead. - * - * @throws IOException - */ - private void printWithQuotes(final Reader reader, final Appendable out) throws IOException { - - if (getQuoteMode() == QuoteMode.NONE) { - printWithEscapes(reader, out); - return; - } - - int pos = 0; - - final char quote = getQuoteCharacter().charValue(); - final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); - - out.append(quote); - - int c; - while (-1 != (c = reader.read())) { - builder.append((char) c); - if (c == quote) { - // write out segment up until this char - if (pos > 0) { - out.append(builder.substring(0, pos)); - builder.setLength(0); - pos = -1; - } - - out.append(quote); - out.append((char) c); - } - pos++; - } - - // write last segment - if (pos > 0) { - out.append(builder.substring(0, pos)); - } - - out.append(quote); - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - sb.append("Delimiter=<").append(delimiter).append('>'); - if (isEscapeCharacterSet()) { - sb.append(' '); - sb.append("Escape=<").append(escapeCharacter).append('>'); - } - if (isQuoteCharacterSet()) { - sb.append(' '); - sb.append("QuoteChar=<").append(quoteCharacter).append('>'); - } - if (quoteMode != null) { - sb.append(' '); - sb.append("QuoteMode=<").append(quoteMode).append('>'); - } - if (isCommentMarkerSet()) { - sb.append(' '); - sb.append("CommentStart=<").append(commentMarker).append('>'); - } - if (isNullStringSet()) { - sb.append(' '); - sb.append("NullString=<").append(nullString).append('>'); - } - if (recordSeparator != null) { - sb.append(' '); - sb.append("RecordSeparator=<").append(recordSeparator).append('>'); - } - if (getIgnoreEmptyLines()) { - sb.append(" EmptyLines:ignored"); - } - if (getIgnoreSurroundingSpaces()) { - sb.append(" SurroundingSpaces:ignored"); - } - if (getIgnoreHeaderCase()) { - sb.append(" IgnoreHeaderCase:ignored"); - } - sb.append(" SkipHeaderRecord:").append(skipHeaderRecord); - if (headerComments != null) { - sb.append(' '); - sb.append("HeaderComments:").append(Arrays.toString(headerComments)); - } - if (header != null) { - sb.append(' '); - sb.append("Header:").append(Arrays.toString(header)); - } - return sb.toString(); - } - - private String[] toStringArray(final Object[] values) { - if (values == null) { - return null; - } - final String[] strings = new String[values.length]; - for (int i = 0; i < values.length; i++) { - final Object value = values[i]; - strings[i] = value == null ? null : value.toString(); - } - return strings; - } - - private CharSequence trim(final CharSequence charSequence) { - if (charSequence instanceof String) { - return ((String) charSequence).trim(); - } - final int count = charSequence.length(); - int len = count; - int pos = 0; - - while (pos < len && charSequence.charAt(pos) <= SP) { - pos++; - } - while (pos < len && charSequence.charAt(len - 1) <= SP) { - len--; - } - return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence; - } - - /** - * Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary. - * - * @throws IllegalArgumentException - */ - private void validate() throws IllegalArgumentException { - if (isLineBreak(delimiter)) { - throw new IllegalArgumentException("The delimiter cannot be a line break"); - } - - if (quoteCharacter != null && delimiter == quoteCharacter.charValue()) { - throw new IllegalArgumentException( - "The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); - } - - if (escapeCharacter != null && delimiter == escapeCharacter.charValue()) { - throw new IllegalArgumentException( - "The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')"); - } - - if (commentMarker != null && delimiter == commentMarker.charValue()) { - throw new IllegalArgumentException( - "The comment start character and the delimiter cannot be the same ('" + commentMarker + "')"); - } - - if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) { - throw new IllegalArgumentException( - "The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')"); - } - - if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) { - throw new IllegalArgumentException( - "The comment start and the escape character cannot be the same ('" + commentMarker + "')"); - } - - if (escapeCharacter == null && quoteMode == QuoteMode.NONE) { - throw new IllegalArgumentException("No quotes mode set but no escape character is set"); - } - - // validate header - if (header != null && !allowDuplicateHeaderNames) { - final Set- * Calling this method is equivalent to calling: - *
- * - *- * CSVFormat format = aFormat.withHeader().withSkipHeaderRecord(); - *- * - * @return A new CSVFormat that is equal to this but using the first record as header. - * @see #withSkipHeaderRecord(boolean) - * @see #withHeader(String...) - * @since 1.3 - */ - public CSVFormat withFirstRecordAsHeader() { - return withHeader().withSkipHeaderRecord(); - } - - /** - * Returns a new {@code CSVFormat} with the header of the format defined by the enum class. - * - *
- * Example: - *
- * - *- * public enum Header { - * Name, Email, Phone - * } - * - * CSVFormat format = aformat.withHeader(Header.class); - *- *
- * The header is also used by the {@link CSVPrinter}. - *
- * - * @param headerEnum - * the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified - * otherwise. - * - * @return A new CSVFormat that is equal to this but with the specified header - * @see #withHeader(String...) - * @see #withSkipHeaderRecord(boolean) - * @since 1.3 - */ - public CSVFormat withHeader(final Class extends Enum>> headerEnum) { - String[] header = null; - if (headerEnum != null) { - final Enum>[] enumValues = headerEnum.getEnumConstants(); - header = new String[enumValues.length]; - for (int i = 0; i < enumValues.length; i++) { - header[i] = enumValues[i].name(); - } - } - return withHeader(header); - } - - /** - * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can - * either be parsed automatically from the input file with: - * - *- * CSVFormat format = aformat.withHeader(); - *- * - * or specified manually with: - * - *
- * CSVFormat format = aformat.withHeader(resultSet); - *- *
- * The header is also used by the {@link CSVPrinter}. - *
- * - * @param resultSet - * the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified - * otherwise. - * - * @return A new CSVFormat that is equal to this but with the specified header - * @throws SQLException - * SQLException if a database access error occurs or this method is called on a closed result set. - * @since 1.1 - */ - public CSVFormat withHeader(final ResultSet resultSet) throws SQLException { - return withHeader(resultSet != null ? resultSet.getMetaData() : null); - } - - /** - * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can - * either be parsed automatically from the input file with: - * - *- * CSVFormat format = aformat.withHeader(); - *- * - * or specified manually with: - * - *
- * CSVFormat format = aformat.withHeader(metaData); - *- *
- * The header is also used by the {@link CSVPrinter}. - *
- * - * @param metaData - * the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified - * otherwise. - * - * @return A new CSVFormat that is equal to this but with the specified header - * @throws SQLException - * SQLException if a database access error occurs or this method is called on a closed result set. - * @since 1.1 - */ - public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLException { - String[] labels = null; - if (metaData != null) { - final int columnCount = metaData.getColumnCount(); - labels = new String[columnCount]; - for (int i = 0; i < columnCount; i++) { - labels[i] = metaData.getColumnLabel(i + 1); - } - } - return withHeader(labels); - } - - /** - * Returns a new {@code CSVFormat} with the header of the format set to the given values. The header can either be - * parsed automatically from the input file with: - * - *- * CSVFormat format = aformat.withHeader(); - *- * - * or specified manually with: - * - *
- * CSVFormat format = aformat.withHeader("name", "email", "phone"); - *- *
- * The header is also used by the {@link CSVPrinter}. - *
- * - * @param header - * the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. - * - * @return A new CSVFormat that is equal to this but with the specified header - * @see #withSkipHeaderRecord(boolean) - */ - public CSVFormat withHeader(final String... header) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will - * be printed first, before the headers. This setting is ignored by the parser. - * - *- * CSVFormat format = aformat.withHeaderComments("Generated by Apache Commons CSV 1.1.", new Date()); - *- * - * @param headerComments - * the headerComments which will be printed by the Printer before the actual CSV data. - * - * @return A new CSVFormat that is equal to this but with the specified header - * @see #withSkipHeaderRecord(boolean) - * @since 1.1 - */ - public CSVFormat withHeaderComments(final Object... headerComments) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}. - * - * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. - * @since {@link #withIgnoreEmptyLines(boolean)} - * @since 1.1 - */ - public CSVFormat withIgnoreEmptyLines() { - return this.withIgnoreEmptyLines(true); - } - - /** - * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value. - * - * @param ignoreEmptyLines - * the empty line skipping behavior, {@code true} to ignore the empty lines between the records, - * {@code false} to translate empty lines to empty records. - * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. - */ - public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the header ignore case behavior set to {@code true}. - * - * @return A new CSVFormat that will ignore case header name. - * @see #withIgnoreHeaderCase(boolean) - * @since 1.3 - */ - public CSVFormat withIgnoreHeaderCase() { - return this.withIgnoreHeaderCase(true); - } - - /** - * Returns a new {@code CSVFormat} with whether header names should be accessed ignoring case. - * - * @param ignoreHeaderCase - * the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as - * is. - * @return A new CSVFormat that will ignore case header name if specified as {@code true} - * @since 1.3 - */ - public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}. - * - * @return A new CSVFormat that is equal to this but with the specified parser trimming behavior. - * @see #withIgnoreSurroundingSpaces(boolean) - * @since 1.1 - */ - public CSVFormat withIgnoreSurroundingSpaces() { - return this.withIgnoreSurroundingSpaces(true); - } - - /** - * Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value. - * - * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, - * {@code false} to leave the spaces as is. - * @return A new CSVFormat that is equal to this but with the specified trimming behavior. - */ - public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with conversions to and from null for strings on input and output. - *
- * Note: This setting is only used during printing and does not affect parsing. Parsing currently - * only works for inputs with '\n', '\r' and "\r\n" - *
- * - * @param recordSeparator - * the record separator to use for output. - * - * @return A new CSVFormat that is equal to this but with the specified output record separator - */ - public CSVFormat withRecordSeparator(final char recordSeparator) { - return withRecordSeparator(String.valueOf(recordSeparator)); - } - - /** - * Returns a new {@code CSVFormat} with the record separator of the format set to the specified String. - * - *- * Note: This setting is only used during printing and does not affect parsing. Parsing currently - * only works for inputs with '\n', '\r' and "\r\n" - *
- * - * @param recordSeparator - * the record separator to use for output. - * - * @return A new CSVFormat that is equal to this but with the specified output record separator - * @throws IllegalArgumentException - * if recordSeparator is none of CR, LF or CRLF - */ - public CSVFormat withRecordSeparator(final String recordSeparator) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with skipping the header record set to {@code true}. - * - * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. - * @see #withSkipHeaderRecord(boolean) - * @see #withHeader(String...) - * @since 1.1 - */ - public CSVFormat withSkipHeaderRecord() { - return this.withSkipHeaderRecord(true); - } - - /** - * Returns a new {@code CSVFormat} with whether to skip the header record. - * - * @param skipHeaderRecord - * whether to skip the header record. - * - * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. - * @see #withHeader(String...) - */ - public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} with the record separator of the format set to the operating system's line - * separator string, typically CR+LF on Windows and LF on Linux. - * - *- * Note: This setting is only used during printing and does not affect parsing. Parsing currently - * only works for inputs with '\n', '\r' and "\r\n" - *
- * - * @return A new CSVFormat that is equal to this but with the operating system's line separator string. - * @since 1.6 - */ - public CSVFormat withSystemRecordSeparator() { - return withRecordSeparator(System.getProperty("line.separator")); - } - - /** - * Returns a new {@code CSVFormat} to add a trailing delimiter. - * - * @return A new CSVFormat that is equal to this but with the trailing delimiter setting. - * @since 1.3 - */ - public CSVFormat withTrailingDelimiter() { - return withTrailingDelimiter(true); - } - - /** - * Returns a new {@code CSVFormat} with whether to add a trailing delimiter. - * - * @param trailingDelimiter - * whether to add a trailing delimiter. - * - * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting. - * @since 1.3 - */ - public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } - - /** - * Returns a new {@code CSVFormat} to trim leading and trailing blanks. - * See {@link #getTrim()} for details of where this is used. - * - * @return A new CSVFormat that is equal to this but with the trim setting on. - * @since 1.3 - */ - public CSVFormat withTrim() { - return withTrim(true); - } - - /** - * Returns a new {@code CSVFormat} with whether to trim leading and trailing blanks. - * See {@link #getTrim()} for details of where this is used. - * - * @param trim - * whether to trim leading and trailing blanks. - * - * @return A new CSVFormat that is equal to this but with the specified trim setting. - * @since 1.3 - */ - public CSVFormat withTrim(final boolean trim) { - return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, - ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, - allowDuplicateHeaderNames); - } -} diff --git a/src/test/resources/org/apache/commons/csv/CSVParser.java b/src/test/resources/org/apache/commons/csv/CSVParser.java deleted file mode 100644 index bf6eb6d6..00000000 --- a/src/test/resources/org/apache/commons/csv/CSVParser.java +++ /dev/null @@ -1,715 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Token.Type.TOKEN; - -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.io.StringReader; -import java.net.URL; -import java.nio.charset.Charset; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Objects; -import java.util.TreeMap; - -/** - * Parses CSV files according to the specified format. - * - * Because CSV appears in many different dialects, the parser supports many formats by allowing the - * specification of a {@link CSVFormat}. - * - * The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream. - * - *- * There are several static factory methods that can be used to create instances for various types of resources: - *
- *- * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor. - * - * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut: - *
- *- * for(CSVRecord record : CSVFormat.EXCEL.parse(in)) { - * ... - * } - *- * - *
- * To parse a CSV input from a file, you write: - *
- * - *- * File csvData = new File("/path/to/csv"); - * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180); - * for (CSVRecord csvRecord : parser) { - * ... - * } - *- * - *
- * This will read the parse the contents of the file using the - * RFC 4180 format. - *
- * - *- * To parse CSV input in a format like Excel, you write: - *
- * - *- * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL); - * for (CSVRecord csvRecord : parser) { - * ... - * } - *- * - *
- * If the predefined formats don't match the format at hands, custom formats can be defined. More information about - * customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}. - *
- * - *- * If parsing record wise is not desired, the contents of the input can be read completely into memory. - *
- * - *- * Reader in = new StringReader("a;b\nc;d"); - * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL); - * List<CSVRecord> list = parser.getRecords(); - *- * - *
- * There are two constraints that have to be kept in mind: - *
- * - *- * Internal parser state is completely covered by the format and the reader-state. - *
- * - * @see package documentation for more details - */ -public final class CSVParser implements Iterable- * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, - * unless you close the {@code reader}. - *
- * - * @param inputStream - * an InputStream containing CSV-formatted input. Must not be null. - * @param charset - * The Charset to decode the given file. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new CSVParser configured with the given reader and format. - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either reader or format are null. - * @throws IOException - * If there is a problem reading the header or skipping the first record - * @since 1.5 - */ - @SuppressWarnings("resource") - public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format) - throws IOException { - Objects.requireNonNull(inputStream, "inputStream"); - Objects.requireNonNull(format, "format"); - return parse(new InputStreamReader(inputStream, charset), format); - } - - /** - * Creates and returns a parser for the given {@link Path}, which the caller MUST close. - * - * @param path - * a CSV file. Must not be null. - * @param charset - * The Charset to decode the given file. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new parser - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either file or format are null. - * @throws IOException - * If an I/O error occurs - * @since 1.5 - */ - @SuppressWarnings("resource") - public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException { - Objects.requireNonNull(path, "path"); - Objects.requireNonNull(format, "format"); - return parse(Files.newInputStream(path), charset, format); - } - - /** - * Creates a CSV parser using the given {@link CSVFormat} - * - *- * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, - * unless you close the {@code reader}. - *
- * - * @param reader - * a Reader containing CSV-formatted input. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new CSVParser configured with the given reader and format. - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either reader or format are null. - * @throws IOException - * If there is a problem reading the header or skipping the first record - * @since 1.5 - */ - public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException { - return new CSVParser(reader, format); - } - - // the following objects are shared to reduce garbage - - /** - * Creates a parser for the given {@link String}. - * - * @param string - * a CSV string. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new parser - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either string or format are null. - * @throws IOException - * If an I/O error occurs - */ - public static CSVParser parse(final String string, final CSVFormat format) throws IOException { - Objects.requireNonNull(string, "string"); - Objects.requireNonNull(format, "format"); - - return new CSVParser(new StringReader(string), format); - } - - /** - * Creates and returns a parser for the given URL, which the caller MUST close. - * - *- * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless - * you close the {@code url}. - *
- * - * @param url - * a URL. Must not be null. - * @param charset - * the charset for the resource. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @return a new parser - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either url, charset or format are null. - * @throws IOException - * If an I/O error occurs - */ - @SuppressWarnings("resource") - public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException { - Objects.requireNonNull(url, "url"); - Objects.requireNonNull(charset, "charset"); - Objects.requireNonNull(format, "format"); - - return new CSVParser(new InputStreamReader(url.openStream(), charset), format); - } - - private final CSVFormat format; - - /** A mapping of column names to column indices */ - private final Map- * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, - * unless you close the {@code reader}. - *
- * - * @param reader - * a Reader containing CSV-formatted input. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either reader or format are null. - * @throws IOException - * If there is a problem reading the header or skipping the first record - */ - public CSVParser(final Reader reader, final CSVFormat format) throws IOException { - this(reader, format, 0, 1); - } - - /** - * Customized CSV parser using the given {@link CSVFormat} - * - *- * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser, - * unless you close the {@code reader}. - *
- * - * @param reader - * a Reader containing CSV-formatted input. Must not be null. - * @param format - * the CSVFormat used for CSV parsing. Must not be null. - * @param characterOffset - * Lexer offset when the parser does not start parsing at the beginning of the source. - * @param recordNumber - * The next record number to assign - * @throws IllegalArgumentException - * If the parameters of the format are inconsistent or if either reader or format are null. - * @throws IOException - * If there is a problem reading the header or skipping the first record - * @since 1.1 - */ - @SuppressWarnings("resource") - public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber) - throws IOException { - Objects.requireNonNull(reader, "reader"); - Objects.requireNonNull(format, "format"); - - this.format = format; - this.lexer = new Lexer(format, new ExtendedBufferedReader(reader)); - this.csvRecordIterator = new CSVRecordIterator(); - final Headers headers = createHeaders(); - this.headerMap = headers.headerMap; - this.headerNames = headers.headerNames; - this.characterOffset = characterOffset; - this.recordNumber = recordNumber - 1; - } - - private void addRecordValue(final boolean lastRecord) { - final String input = this.reusableToken.content.toString(); - final String inputClean = this.format.getTrim() ? input.trim() : input; - if (lastRecord && inputClean.isEmpty() && this.format.getTrailingDelimiter()) { - return; - } - final String nullString = this.format.getNullString(); - this.recordList.add(inputClean.equals(nullString) ? null : inputClean); - } - - /** - * Closes resources. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void close() throws IOException { - if (this.lexer != null) { - this.lexer.close(); - } - } - - private Map- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to - * the record number. - *
- * - * @return current line number - */ - public long getCurrentLineNumber() { - return this.lexer.getCurrentLineNumber(); - } - - /** - * Gets the first end-of-line string encountered. - * - * @return the first end-of-line string - * @since 1.5 - */ - public String getFirstEndOfLine() { - return lexer.getFirstEol(); - } - - /** - * Returns a copy of the header map. - *- * The map keys are column names. The map values are 0-based indices. - *
- *- * Note: The map can only provide a one-to-one mapping when the format did not - * contain null or duplicate column names. - *
- * - * @return a copy of the header map. - */ - public Map- * Note: The list provides strings that can be used as keys in the header map. - * The list will not contain null column names if they were present in the input - * format. - *
- * - * @return read-only list of header names that iterates in column order. - * @see #getHeaderMap() - * @since 1.7 - */ - public List- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to - * the line number. - *
- * - * @return current record number - */ - public long getRecordNumber() { - return this.recordNumber; - } - - /** - * Parses the CSV input according to the given format and returns the content as a list of - * {@link CSVRecord CSVRecords}. - * - *- * The returned content starts at the current parse-position in the stream. - *
- * - * @return list of {@link CSVRecord CSVRecords}, may be empty - * @throws IOException - * on parse error or input read-failure - */ - public List- * An {@link IOException} caught during the iteration are re-thrown as an - * {@link IllegalStateException}. - *
- *- * If the parser is closed a call to {@link Iterator#next()} will throw a - * {@link NoSuchElementException}. - *
- */ - @Override - public IteratorValues can be appended to the output by calling the {@link #print(Object)} method. - * Values are printed according to {@link String#valueOf(Object)}. - * To complete a record the {@link #println()} method has to be called. - * Comments can be appended by calling {@link #printComment(String)}. - * However a comment will only be written to the output if the {@link CSVFormat} supports comments. - *
- * - *The printer also supports appending a complete record at once by calling {@link #printRecord(Object...)} - * or {@link #printRecord(Iterable)}. - * Furthermore {@link #printRecords(Object...)}, {@link #printRecords(Iterable)} and {@link #printRecords(ResultSet)} - * methods can be used to print several records at once. - *
- * - *Example:
- * - *- * try (CSVPrinter printer = new CSVPrinter(new FileWriter("csv.txt"), CSVFormat.EXCEL)) { - * printer.printRecord("id", "userName", "firstName", "lastName", "birthday"); - * printer.printRecord(1, "john73", "John", "Doe", LocalDate.of(1973, 9, 15)); - * printer.println(); - * printer.printRecord(2, "mary", "Mary", "Meyer", LocalDate.of(1985, 3, 29)); - * } catch (IOException ex) { - * ex.printStackTrace(); - * } - *- * - *
This code will write the following to csv.txt:
- *- * id,userName,firstName,lastName,birthday - * 1,john73,John,Doe,1973-09-15 - * - * 2,mary,Mary,Meyer,1985-03-29 - *- */ -public final class CSVPrinter implements Flushable, Closeable { - - /** The place that the values get written. */ - private final Appendable out; - private final CSVFormat format; - - /** True if we just began a new record. */ - private boolean newRecord = true; - - /** - * Creates a printer that will print values to the given stream following the CSVFormat. - *
- * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation - * and escaping with a different character) are not supported. - *
- * - * @param out - * stream to which to print. Must not be null. - * @param format - * the CSV format. Must not be null. - * @throws IOException - * thrown if the optional header cannot be printed. - * @throws IllegalArgumentException - * thrown if the parameters of the format are inconsistent or if either out or format are null. - */ - public CSVPrinter(final Appendable out, final CSVFormat format) throws IOException { - Objects.requireNonNull(out, "out"); - Objects.requireNonNull(format, "format"); - - this.out = out; - this.format = format; - // TODO: Is it a good idea to do this here instead of on the first call to a print method? - // It seems a pain to have to track whether the header has already been printed or not. - if (format.getHeaderComments() != null) { - for (final String line : format.getHeaderComments()) { - if (line != null) { - this.printComment(line); - } - } - } - if (format.getHeader() != null && !format.getSkipHeaderRecord()) { - this.printRecord((Object[]) format.getHeader()); - } - } - - // ====================================================== - // printing implementation - // ====================================================== - - @Override - public void close() throws IOException { - close(false); - } - - /** - * Closes the underlying stream with an optional flush first. - * @param flush whether to flush before the actual close. - * - * @throws IOException - * If an I/O error occurs - * @since 1.6 - */ - public void close(final boolean flush) throws IOException { - if (flush || format.getAutoFlush()) { - flush(); - } - if (out instanceof Closeable) { - ((Closeable) out).close(); - } - } - - /** - * Flushes the underlying stream. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void flush() throws IOException { - if (out instanceof Flushable) { - ((Flushable) out).flush(); - } - } - - /** - * Gets the target Appendable. - * - * @return the target Appendable. - */ - public Appendable getOut() { - return this.out; - } - - /** - * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed. - * - * @param value - * value to be output. - * @throws IOException - * If an I/O error occurs - */ - public void print(final Object value) throws IOException { - format.print(value, out, newRecord); - newRecord = false; - } - - /** - * Prints a comment on a new line among the delimiter separated values. - * - *- * Comments will always begin on a new line and occupy at least one full line. The character specified to start - * comments and a space will be inserted at the beginning of each new line in the comment. - *
- * - *- * If comments are disabled in the current CSV format this method does nothing. - *
- * - *This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()} - * to start a new line of the comment. Note that this might produce unexpected results for formats that do not use - * line breaks as record separator.
- * - * @param comment - * the comment to output - * @throws IOException - * If an I/O error occurs - */ - public void printComment(final String comment) throws IOException { - if (!format.isCommentMarkerSet()) { - return; - } - if (!newRecord) { - println(); - } - out.append(format.getCommentMarker().charValue()); - out.append(SP); - final int commentLength = comment.length(); - for (int i = 0; i < commentLength; i++) { - final char c = comment.charAt(i); - switch (c) { - case CR: - if (i + 1 < commentLength && comment.charAt(i + 1) == LF) { - i++; - } - //$FALL-THROUGH$ break intentionally excluded. - case LF: - println(); - out.append(format.getCommentMarker().charValue()); - out.append(SP); - break; - default: - out.append(c); - break; - } - } - println(); - } - - /** - * Outputs the record separator. - * - * @throws IOException - * If an I/O error occurs - */ - public void println() throws IOException { - format.println(out); - newRecord = true; - } - - /** - * Prints the given values a single record of delimiter separated values followed by the record separator. - * - *- * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record - * separator to the output after printing the record, so there is no need to call {@link #println()}. - *
- * - * @param values - * values to output. - * @throws IOException - * If an I/O error occurs - */ - public void printRecord(final Iterable> values) throws IOException { - for (final Object value : values) { - print(value); - } - println(); - } - - /** - * Prints the given values a single record of delimiter separated values followed by the record separator. - * - *- * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record - * separator to the output after printing the record, so there is no need to call {@link #println()}. - *
- * - * @param values - * values to output. - * @throws IOException - * If an I/O error occurs - */ - public void printRecord(final Object... values) throws IOException { - format.printRecord(out, values); - newRecord = true; - } - - /** - * Prints all the objects in the given collection handling nested collections/arrays as records. - * - *- * If the given collection only contains simple objects, this method will print a single record like - * {@link #printRecord(Iterable)}. If the given collections contains nested collections/arrays those nested elements - * will each be printed as records using {@link #printRecord(Object...)}. - *
- * - *- * Given the following data structure: - *
- * - *
- *
- * List<String[]> data = ...
- * data.add(new String[]{ "A", "B", "C" });
- * data.add(new String[]{ "1", "2", "3" });
- * data.add(new String[]{ "A1", "B2", "C3" });
- *
- *
- *
- * - * Calling this method will print: - *
- * - *
- *
- * A, B, C
- * 1, 2, 3
- * A1, B2, C3
- *
- *
- *
- * @param values
- * the values to print.
- * @throws IOException
- * If an I/O error occurs
- */
- public void printRecords(final Iterable> values) throws IOException {
- for (final Object value : values) {
- if (value instanceof Object[]) {
- this.printRecord((Object[]) value);
- } else if (value instanceof Iterable) {
- this.printRecord((Iterable>) value);
- } else {
- this.printRecord(value);
- }
- }
- }
-
- /**
- * Prints all the objects in the given array handling nested collections/arrays as records.
- *
- * - * If the given array only contains simple objects, this method will print a single record like - * {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested - * elements will each be printed as records using {@link #printRecord(Object...)}. - *
- * - *- * Given the following data structure: - *
- * - *
- *
- * String[][] data = new String[3][]
- * data[0] = String[]{ "A", "B", "C" };
- * data[1] = new String[]{ "1", "2", "3" };
- * data[2] = new String[]{ "A1", "B2", "C3" };
- *
- *
- *
- * - * Calling this method will print: - *
- * - *
- *
- * A, B, C
- * 1, 2, 3
- * A1, B2, C3
- *
- *
- *
- * @param values
- * the values to print.
- * @throws IOException
- * If an I/O error occurs
- */
- public void printRecords(final Object... values) throws IOException {
- printRecords(Arrays.asList(values));
- }
-
- /**
- * Prints all the objects in the given JDBC result set.
- *
- * @param resultSet
- * result set the values to print.
- * @throws IOException
- * If an I/O error occurs
- * @throws SQLException
- * if a database access error occurs
- */
- public void printRecords(final ResultSet resultSet) throws SQLException, IOException {
- final int columnCount = resultSet.getMetaData().getColumnCount();
- while (resultSet.next()) {
- for (int i = 1; i <= columnCount; i++) {
- final Object object = resultSet.getObject(i);
- // TODO Who manages the Clob? The JDBC driver or must we close it? Is it driver-dependent?
- print(object instanceof Clob ? ((Clob) object).getCharacterStream() : object);
- }
- println();
- }
- }
-}
diff --git a/src/test/resources/org/apache/commons/csv/CSVRecord.java b/src/test/resources/org/apache/commons/csv/CSVRecord.java
deleted file mode 100644
index 81d1f2b6..00000000
--- a/src/test/resources/org/apache/commons/csv/CSVRecord.java
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.commons.csv;
-
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Objects;
-
-/**
- * A CSV record parsed from a CSV file.
- *
- * - * Note: Support for {@link Serializable} is scheduled to be removed in version 2.0. - * In version 1.8 the mapping between the column header and the column index was - * removed from the serialised state. The class maintains serialization compatibility - * with versions pre-1.8 for the record values; these must be accessed by index - * following deserialization. There will be loss of any functionally linked to the header - * mapping when transferring serialised forms pre-1.8 to 1.8 and vice versa. - *
- */ -public final class CSVRecord implements Serializable, Iterable- * Note: This requires a field mapping obtained from the original parser. - * A check using {@link #isMapped(String)} should be used to determine if a - * mapping exists from the provided {@code name} to a field index. In this case an - * exception will only be thrown if the record does not contain a field corresponding - * to the mapping, that is the record length is not consistent with the mapping size. - *
- * - * @param name - * the name of the column to be retrieved. - * @return the column value, maybe null depending on {@link CSVFormat#getNullString()}. - * @throws IllegalStateException - * if no header mapping was provided - * @throws IllegalArgumentException - * if {@code name} is not mapped or if the record is inconsistent - * @see #isMapped(String) - * @see #isConsistent() - * @see #getParser() - * @see CSVFormat#withNullString(String) - */ - public String get(final String name) { - final Map- * Note: The parser is not part of the serialized state of the record. A null check - * should be used when the record may have originated from a serialized form. - *
- * - * @return the parser. - * @since 1.7 - */ - public CSVParser getParser() { - return parser; - } - - /** - * Returns the number of this record in the parsed CSV file. - * - *- * ATTENTION: If your CSV input has multi-line values, the returned number does not correspond to - * the current line number of the parser that created this record. - *
- * - * @return the number of this record. - * @see CSVParser#getCurrentLineNumber() - */ - public long getRecordNumber() { - return recordNumber; - } - - /** - * Checks whether this record has a comment, false otherwise. - * Note that comments are attached to the following record. - * If there is no following record (i.e. the comment is at EOF) - * the comment will be ignored. - * - * @return true if this record has a comment, false otherwise - * @since 1.3 - */ - public boolean hasComment() { - return comment != null; - } - - /** - * Tells whether the record size matches the header size. - * - *- * Returns true if the sizes for this record match and false if not. Some programs can export files that fail this - * test but still produce parsable files. - *
- * - * @return true of this record is valid, false if not - */ - public boolean isConsistent() { - final Map- * In particular the reader supports a look-ahead option, which allows you to see the next char returned by - * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}. - *
- */ -final class ExtendedBufferedReader extends BufferedReader { - - /** The last char returned */ - private int lastChar = UNDEFINED; - - /** The count of EOLs (CR/LF/CRLF) seen so far */ - private long eolCounter; - - /** The position, which is number of characters read so far */ - private long position; - - private boolean closed; - - /** - * Created extended buffered reader using default buffer-size - */ - ExtendedBufferedReader(final Reader reader) { - super(reader); - } - - /** - * Closes the stream. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void close() throws IOException { - // Set ivars before calling super close() in case close() throws an IOException. - closed = true; - lastChar = END_OF_STREAM; - super.close(); - } - - /** - * Returns the current line number - * - * @return the current line number - */ - long getCurrentLineNumber() { - // Check if we are at EOL or EOF or just starting - if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) { - return eolCounter; // counter is accurate - } - return eolCounter + 1; // Allow for counter being incremented only at EOL - } - - /** - * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by - * any of the read methods. This will not include a character read using the {@link #lookAhead()} method. If no - * character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached - * on the last read then this will return {@link Constants#END_OF_STREAM}. - * - * @return the last character that was read - */ - int getLastChar() { - return lastChar; - } - - /** - * Gets the character position in the reader. - * - * @return the current position in the reader (counting characters, not bytes since this is a Reader) - */ - long getPosition() { - return this.position; - } - - public boolean isClosed() { - return closed; - } - - /** - * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will - * still return this value. Does not affect line number or last character. - * - * @return the next character - * - * @throws IOException - * if there is an error in reading - */ - int lookAhead() throws IOException { - super.mark(1); - final int c = super.read(); - super.reset(); - - return c; - } - - @Override - public int read() throws IOException { - final int current = super.read(); - if (current == CR || current == LF && lastChar != CR) { - eolCounter++; - } - lastChar = current; - this.position++; - return lastChar; - } - - @Override - public int read(final char[] buf, final int offset, final int length) throws IOException { - if (length == 0) { - return 0; - } - - final int len = super.read(buf, offset, length); - - if (len > 0) { - - for (int i = offset; i < offset + len; i++) { - final char ch = buf[i]; - if (ch == LF) { - if (CR != (i > 0 ? buf[i - 1] : lastChar)) { - eolCounter++; - } - } else if (ch == CR) { - eolCounter++; - } - } - - lastChar = buf[offset + len - 1]; - - } else if (len == -1) { - lastChar = END_OF_STREAM; - } - - position += len; - return len; - } - - /** - * Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called - * when processing a comment, otherwise information can be lost. - *- * Increments {@link #eolCounter} - *
- * Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise to LF - * - * @return the line that was read, or null if reached EOF. - */ - @Override - public String readLine() throws IOException { - final String line = super.readLine(); - - if (line != null) { - lastChar = LF; // needed for detecting start of line - eolCounter++; - } else { - lastChar = END_OF_STREAM; - } - - return line; - } - -} diff --git a/src/test/resources/org/apache/commons/csv/IOUtils.java b/src/test/resources/org/apache/commons/csv/IOUtils.java deleted file mode 100644 index 1771d4dc..00000000 --- a/src/test/resources/org/apache/commons/csv/IOUtils.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.csv; - -import java.io.IOException; -import java.io.Reader; -import java.io.Writer; -import java.nio.CharBuffer; - -/** Copied from Apache Commons IO. */ -class IOUtils { - - /** - *
- * Copied from Apache Commons IO. - *
- * The default buffer size ({@value}). - */ - static final int DEFAULT_BUFFER_SIZE = 1024 * 4; - - /** - *- * Copied from Apache Commons IO. - *
- * Represents the end-of-file (or stream). - * @since 2.5 (made public) - */ - private static final int EOF = -1; - - /** - * Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}. - *- * This method buffers the input internally, so there is no need to use a - * {@code BufferedReader}. - *
- * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}. - * - * @param input the {@code Reader} to read from - * @param output the {@code Appendable} to append to - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 2.7 - */ - static long copy(final Reader input, final Appendable output) throws IOException { - return copy(input, output, CharBuffer.allocate(DEFAULT_BUFFER_SIZE)); - } - - /** - * Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}. - *- * This method uses the provided buffer, so there is no need to use a - * {@code BufferedReader}. - *
- * - * @param input the {@code Reader} to read from - * @param output the {@code Appendable} to write to - * @param buffer the buffer to be used for the copy - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 2.7 - */ - static long copy(final Reader input, final Appendable output, final CharBuffer buffer) throws IOException { - long count = 0; - int n; - while (EOF != (n = input.read(buffer))) { - buffer.flip(); - output.append(buffer, 0, n); - count += n; - } - return count; - } - - /** - *- * Copied from Apache Commons IO. - *
- * Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}. - *- * This method buffers the input internally, so there is no need to use a - * {@code BufferedReader}. - *
- * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}. - * - * @param input the {@code Reader} to read from - * @param output the {@code Writer} to write to - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 1.3 - */ - static long copyLarge(final Reader input, final Writer output) throws IOException { - return copyLarge(input, output, new char[DEFAULT_BUFFER_SIZE]); - } - - /** - *
- * Copied from Apache Commons IO. - *
- * Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}. - *- * This method uses the provided buffer, so there is no need to use a - * {@code BufferedReader}. - *
- * - * @param input the {@code Reader} to read from - * @param output the {@code Writer} to write to - * @param buffer the buffer to be used for the copy - * @return the number of characters copied - * @throws NullPointerException if the input or output is null - * @throws IOException if an I/O error occurs - * @since 2.2 - */ - static long copyLarge(final Reader input, final Writer output, final char[] buffer) throws IOException { - long count = 0; - int n; - while (EOF != (n = input.read(buffer))) { - output.write(buffer, 0, n); - count += n; - } - return count; - } - -} diff --git a/src/test/resources/org/apache/commons/csv/Lexer.java b/src/test/resources/org/apache/commons/csv/Lexer.java deleted file mode 100644 index 2795ca29..00000000 --- a/src/test/resources/org/apache/commons/csv/Lexer.java +++ /dev/null @@ -1,461 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import static org.apache.commons.csv.Constants.BACKSPACE; -import static org.apache.commons.csv.Constants.CR; -import static org.apache.commons.csv.Constants.END_OF_STREAM; -import static org.apache.commons.csv.Constants.FF; -import static org.apache.commons.csv.Constants.LF; -import static org.apache.commons.csv.Constants.TAB; -import static org.apache.commons.csv.Constants.UNDEFINED; -import static org.apache.commons.csv.Token.Type.COMMENT; -import static org.apache.commons.csv.Token.Type.EOF; -import static org.apache.commons.csv.Token.Type.EORECORD; -import static org.apache.commons.csv.Token.Type.INVALID; -import static org.apache.commons.csv.Token.Type.TOKEN; - -import java.io.Closeable; -import java.io.IOException; - -/** - * Lexical analyzer. - */ -final class Lexer implements Closeable { - - private static final String CR_STRING = Character.toString(CR); - private static final String LF_STRING = Character.toString(LF); - - /** - * Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it - * won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two - * chars (using surrogates) and thus there should never be a collision with a real text char. - */ - private static final char DISABLED = '\ufffe'; - - private final char delimiter; - private final char escape; - private final char quoteChar; - private final char commentStart; - - private final boolean ignoreSurroundingSpaces; - private final boolean ignoreEmptyLines; - - /** The input stream */ - private final ExtendedBufferedReader reader; - private String firstEol; - - Lexer(final CSVFormat format, final ExtendedBufferedReader reader) { - this.reader = reader; - this.delimiter = format.getDelimiter(); - this.escape = mapNullToDisabled(format.getEscapeCharacter()); - this.quoteChar = mapNullToDisabled(format.getQuoteCharacter()); - this.commentStart = mapNullToDisabled(format.getCommentMarker()); - this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces(); - this.ignoreEmptyLines = format.getIgnoreEmptyLines(); - } - - /** - * Closes resources. - * - * @throws IOException - * If an I/O error occurs - */ - @Override - public void close() throws IOException { - reader.close(); - } - - /** - * Returns the current character position - * - * @return the current character position - */ - long getCharacterPosition() { - return reader.getPosition(); - } - - /** - * Returns the current line number - * - * @return the current line number - */ - long getCurrentLineNumber() { - return reader.getCurrentLineNumber(); - } - - String getFirstEol(){ - return firstEol; - } - - boolean isClosed() { - return reader.isClosed(); - } - - boolean isCommentStart(final int ch) { - return ch == commentStart; - } - - boolean isDelimiter(final int ch) { - return ch == delimiter; - } - - /** - * @return true if the given character indicates end of file - */ - boolean isEndOfFile(final int ch) { - return ch == END_OF_STREAM; - } - - boolean isEscape(final int ch) { - return ch == escape; - } - - private boolean isMetaChar(final int ch) { - return ch == delimiter || - ch == escape || - ch == quoteChar || - ch == commentStart; - } - - boolean isQuoteChar(final int ch) { - return ch == quoteChar; - } - - /** - * Checks if the current character represents the start of a line: a CR, LF or is at the start of the file. - * - * @param ch the character to check - * @return true if the character is at the start of a line. - */ - boolean isStartOfLine(final int ch) { - return ch == LF || ch == CR || ch == UNDEFINED; - } - - /** - * @return true if the given char is a whitespace character - */ - boolean isWhitespace(final int ch) { - return !isDelimiter(ch) && Character.isWhitespace((char) ch); - } - - private char mapNullToDisabled(final Character c) { - return c == null ? DISABLED : c.charValue(); - } - - /** - * Returns the next token. - *
- * A token corresponds to a term, a record change or an end-of-file indicator. - *
- * - * @param token - * an existing Token object to reuse. The caller is responsible to initialize the Token. - * @return the next token found - * @throws java.io.IOException - * on stream access error - */ - Token nextToken(final Token token) throws IOException { - - // get the last read char (required for empty line detection) - int lastChar = reader.getLastChar(); - - // read the next char and set eol - int c = reader.read(); - /* - * Note: The following call will swallow LF if c == CR. But we don't need to know if the last char was CR or LF - * - they are equivalent here. - */ - boolean eol = readEndOfLine(c); - - // empty line detection: eol AND (last char was EOL or beginning) - if (ignoreEmptyLines) { - while (eol && isStartOfLine(lastChar)) { - // go on char ahead ... - lastChar = c; - c = reader.read(); - eol = readEndOfLine(c); - // reached end of file without any content (empty line at the end) - if (isEndOfFile(c)) { - token.type = EOF; - // don't set token.isReady here because no content - return token; - } - } - } - - // did we reach eof during the last iteration already ? EOF - if (isEndOfFile(lastChar) || !isDelimiter(lastChar) && isEndOfFile(c)) { - token.type = EOF; - // don't set token.isReady here because no content - return token; - } - - if (isStartOfLine(lastChar) && isCommentStart(c)) { - final String line = reader.readLine(); - if (line == null) { - token.type = EOF; - // don't set token.isReady here because no content - return token; - } - final String comment = line.trim(); - token.content.append(comment); - token.type = COMMENT; - return token; - } - - // important: make sure a new char gets consumed in each iteration - while (token.type == INVALID) { - // ignore whitespaces at beginning of a token - if (ignoreSurroundingSpaces) { - while (isWhitespace(c) && !eol) { - c = reader.read(); - eol = readEndOfLine(c); - } - } - - // ok, start of token reached: encapsulated, or token - if (isDelimiter(c)) { - // empty token return TOKEN("") - token.type = TOKEN; - } else if (eol) { - // empty token return EORECORD("") - // noop: token.content.append(""); - token.type = EORECORD; - } else if (isQuoteChar(c)) { - // consume encapsulated token - parseEncapsulatedToken(token); - } else if (isEndOfFile(c)) { - // end of file return EOF() - // noop: token.content.append(""); - token.type = EOF; - token.isReady = true; // there is data at EOF - } else { - // next token must be a simple token - // add removed blanks when not ignoring whitespace chars... - parseSimpleToken(token, c); - } - } - return token; - } - - /** - * Parses an encapsulated token. - * - * Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included - * in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after - * an encapsulated token are ignored. The token is finished when one of the following conditions become true: - *CSV are widely used as interfaces to legacy systems or manual data-imports. - * CSV stands for "Comma Separated Values" (or sometimes "Character Separated - * Values"). The CSV data format is defined in - * RFC 4180 - * but many dialects exist.
- * - *Common to all file dialects is its basic structure: The CSV data-format - * is record oriented, whereas each record starts on a new textual line. A - * record is build of a list of values. Keep in mind that not all records - * must have an equal number of values:
- *- * csv := records* - * record := values* - *- * - *
The following list contains the CSV aspects the Commons CSV parser supports:
- *In addition to individually defined dialects, two predefined dialects (strict-csv, and excel-csv) - * can be set directly.
- * - *Example usage:
- *- */ - -package org.apache.commons.csv;- * Reader in = new StringReader("a,b,c"); - * for (CSVRecord record : CSVFormat.DEFAULT.parse(in)) { - * for (String field : record) { - * System.out.print("\"" + field + "\", "); - * } - * System.out.println(); - * } - *