Remove odd cruft.

This commit is contained in:
Gary Gregory 2021-02-22 21:41:21 -05:00
parent 29459edcdf
commit 91db4ff0b8
11 changed files with 0 additions and 4844 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,715 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import static org.apache.commons.csv.Token.Type.TOKEN;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.TreeMap;
/**
* Parses CSV files according to the specified format.
*
* Because CSV appears in many different dialects, the parser supports many formats by allowing the
* specification of a {@link CSVFormat}.
*
* The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream.
*
* <h2>Creating instances</h2>
* <p>
* There are several static factory methods that can be used to create instances for various types of resources:
* </p>
* <ul>
* <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li>
* <li>{@link #parse(String, CSVFormat)}</li>
* <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li>
* </ul>
* <p>
* Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor.
*
* For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut:
* </p>
* <pre>
* for(CSVRecord record : CSVFormat.EXCEL.parse(in)) {
* ...
* }
* </pre>
*
* <h2>Parsing record wise</h2>
* <p>
* To parse a CSV input from a file, you write:
* </p>
*
* <pre>
* File csvData = new File(&quot;/path/to/csv&quot;);
* CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180);
* for (CSVRecord csvRecord : parser) {
* ...
* }
* </pre>
*
* <p>
* This will read the parse the contents of the file using the
* <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format.
* </p>
*
* <p>
* To parse CSV input in a format like Excel, you write:
* </p>
*
* <pre>
* CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL);
* for (CSVRecord csvRecord : parser) {
* ...
* }
* </pre>
*
* <p>
* If the predefined formats don't match the format at hands, custom formats can be defined. More information about
* customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
* </p>
*
* <h2>Parsing into memory</h2>
* <p>
* If parsing record wise is not desired, the contents of the input can be read completely into memory.
* </p>
*
* <pre>
* Reader in = new StringReader(&quot;a;b\nc;d&quot;);
* CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
* List&lt;CSVRecord&gt; list = parser.getRecords();
* </pre>
*
* <p>
* There are two constraints that have to be kept in mind:
* </p>
*
* <ol>
* <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
* the input, those records will not end up in the in memory representation of your CSV data.</li>
* <li>Parsing into memory may consume a lot of system resources depending on the input. For example if you're
* parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
* </ol>
*
* <h2>Notes</h2>
* <p>
* Internal parser state is completely covered by the format and the reader-state.
* </p>
*
* @see <a href="package-summary.html">package documentation for more details</a>
*/
public final class CSVParser implements Iterable<CSVRecord>, Closeable {
class CSVRecordIterator implements Iterator<CSVRecord> {
private CSVRecord current;
private CSVRecord getNextRecord() {
try {
return CSVParser.this.nextRecord();
} catch (final IOException e) {
throw new IllegalStateException(
e.getClass().getSimpleName() + " reading next record: " + e.toString(), e);
}
}
@Override
public boolean hasNext() {
if (CSVParser.this.isClosed()) {
return false;
}
if (this.current == null) {
this.current = this.getNextRecord();
}
return this.current != null;
}
@Override
public CSVRecord next() {
if (CSVParser.this.isClosed()) {
throw new NoSuchElementException("CSVParser has been closed");
}
CSVRecord next = this.current;
this.current = null;
if (next == null) {
// hasNext() wasn't called before
next = this.getNextRecord();
if (next == null) {
throw new NoSuchElementException("No more CSV records available");
}
}
return next;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
/**
* Header information based on name and position.
*/
private static final class Headers {
/**
* Header column positions (0-based)
*/
final Map<String, Integer> headerMap;
/**
* Header names in column order
*/
final List<String> headerNames;
Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
this.headerMap = headerMap;
this.headerNames = headerNames;
}
}
/**
* Creates a parser for the given {@link File}.
*
* @param file
* a CSV file. Must not be null.
* @param charset
* The Charset to decode the given file.
* @param format
* the CSVFormat used for CSV parsing. Must not be null.
* @return a new parser
* @throws IllegalArgumentException
* If the parameters of the format are inconsistent or if either file or format are null.
* @throws IOException
* If an I/O error occurs
*/
@SuppressWarnings("resource")
public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
Objects.requireNonNull(file, "file");
Objects.requireNonNull(format, "format");
return new CSVParser(new InputStreamReader(new FileInputStream(file), charset), format);
}
/**
* Creates a CSV parser using the given {@link CSVFormat}.
*
* <p>
* If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
* unless you close the {@code reader}.
* </p>
*
* @param inputStream
* an InputStream containing CSV-formatted input. Must not be null.
* @param charset
* The Charset to decode the given file.
* @param format
* the CSVFormat used for CSV parsing. Must not be null.
* @return a new CSVParser configured with the given reader and format.
* @throws IllegalArgumentException
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
* @since 1.5
*/
@SuppressWarnings("resource")
public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format)
throws IOException {
Objects.requireNonNull(inputStream, "inputStream");
Objects.requireNonNull(format, "format");
return parse(new InputStreamReader(inputStream, charset), format);
}
/**
* Creates and returns a parser for the given {@link Path}, which the caller MUST close.
*
* @param path
* a CSV file. Must not be null.
* @param charset
* The Charset to decode the given file.
* @param format
* the CSVFormat used for CSV parsing. Must not be null.
* @return a new parser
* @throws IllegalArgumentException
* If the parameters of the format are inconsistent or if either file or format are null.
* @throws IOException
* If an I/O error occurs
* @since 1.5
*/
@SuppressWarnings("resource")
public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException {
Objects.requireNonNull(path, "path");
Objects.requireNonNull(format, "format");
return parse(Files.newInputStream(path), charset, format);
}
/**
* Creates a CSV parser using the given {@link CSVFormat}
*
* <p>
* If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
* unless you close the {@code reader}.
* </p>
*
* @param reader
* a Reader containing CSV-formatted input. Must not be null.
* @param format
* the CSVFormat used for CSV parsing. Must not be null.
* @return a new CSVParser configured with the given reader and format.
* @throws IllegalArgumentException
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
* @since 1.5
*/
public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
return new CSVParser(reader, format);
}
// the following objects are shared to reduce garbage
/**
* Creates a parser for the given {@link String}.
*
* @param string
* a CSV string. Must not be null.
* @param format
* the CSVFormat used for CSV parsing. Must not be null.
* @return a new parser
* @throws IllegalArgumentException
* If the parameters of the format are inconsistent or if either string or format are null.
* @throws IOException
* If an I/O error occurs
*/
public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
Objects.requireNonNull(string, "string");
Objects.requireNonNull(format, "format");
return new CSVParser(new StringReader(string), format);
}
/**
* Creates and returns a parser for the given URL, which the caller MUST close.
*
* <p>
* If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless
* you close the {@code url}.
* </p>
*
* @param url
* a URL. Must not be null.
* @param charset
* the charset for the resource. Must not be null.
* @param format
* the CSVFormat used for CSV parsing. Must not be null.
* @return a new parser
* @throws IllegalArgumentException
* If the parameters of the format are inconsistent or if either url, charset or format are null.
* @throws IOException
* If an I/O error occurs
*/
@SuppressWarnings("resource")
public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
Objects.requireNonNull(url, "url");
Objects.requireNonNull(charset, "charset");
Objects.requireNonNull(format, "format");
return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
}
private final CSVFormat format;
/** A mapping of column names to column indices */
private final Map<String, Integer> headerMap;
/** The column order to avoid re-computing it. */
private final List<String> headerNames;
private final Lexer lexer;
private final CSVRecordIterator csvRecordIterator;
/** A record buffer for getRecord(). Grows as necessary and is reused. */
private final List<String> recordList = new ArrayList<>();
/**
* The next record number to assign.
*/
private long recordNumber;
/**
* Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
* with {@link #recordNumber}.
*/
private final long characterOffset;
private final Token reusableToken = new Token();
/**
* Customized CSV parser using the given {@link CSVFormat}
*
* <p>
* If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
* unless you close the {@code reader}.
* </p>
*
* @param reader
* a Reader containing CSV-formatted input. Must not be null.
* @param format
* the CSVFormat used for CSV parsing. Must not be null.
* @throws IllegalArgumentException
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
*/
public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
this(reader, format, 0, 1);
}
/**
* Customized CSV parser using the given {@link CSVFormat}
*
* <p>
* If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
* unless you close the {@code reader}.
* </p>
*
* @param reader
* a Reader containing CSV-formatted input. Must not be null.
* @param format
* the CSVFormat used for CSV parsing. Must not be null.
* @param characterOffset
* Lexer offset when the parser does not start parsing at the beginning of the source.
* @param recordNumber
* The next record number to assign
* @throws IllegalArgumentException
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
* @since 1.1
*/
@SuppressWarnings("resource")
public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
throws IOException {
Objects.requireNonNull(reader, "reader");
Objects.requireNonNull(format, "format");
this.format = format;
this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
this.csvRecordIterator = new CSVRecordIterator();
final Headers headers = createHeaders();
this.headerMap = headers.headerMap;
this.headerNames = headers.headerNames;
this.characterOffset = characterOffset;
this.recordNumber = recordNumber - 1;
}
private void addRecordValue(final boolean lastRecord) {
final String input = this.reusableToken.content.toString();
final String inputClean = this.format.getTrim() ? input.trim() : input;
if (lastRecord && inputClean.isEmpty() && this.format.getTrailingDelimiter()) {
return;
}
final String nullString = this.format.getNullString();
this.recordList.add(inputClean.equals(nullString) ? null : inputClean);
}
/**
* Closes resources.
*
* @throws IOException
* If an I/O error occurs
*/
@Override
public void close() throws IOException {
if (this.lexer != null) {
this.lexer.close();
}
}
private Map<String, Integer> createEmptyHeaderMap() {
return this.format.getIgnoreHeaderCase() ?
new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
new LinkedHashMap<>();
}
/**
* Creates the name to index mapping if the format defines a header.
*
* @return null if the format has no header.
* @throws IOException if there is a problem reading the header or skipping the first record
*/
private Headers createHeaders() throws IOException {
Map<String, Integer> hdrMap = null;
List<String> headerNames = null;
final String[] formatHeader = this.format.getHeader();
if (formatHeader != null) {
hdrMap = createEmptyHeaderMap();
String[] headerRecord = null;
if (formatHeader.length == 0) {
// read the header from the first line of the file
final CSVRecord nextRecord = this.nextRecord();
if (nextRecord != null) {
headerRecord = nextRecord.values();
}
} else {
if (this.format.getSkipHeaderRecord()) {
this.nextRecord();
}
headerRecord = formatHeader;
}
// build the name to index mappings
if (headerRecord != null) {
for (int i = 0; i < headerRecord.length; i++) {
final String header = headerRecord[i];
final boolean emptyHeader = header == null || header.trim().isEmpty();
if (emptyHeader && !this.format.getAllowMissingColumnNames()) {
throw new IllegalArgumentException(
"A header name is missing in " + Arrays.toString(headerRecord));
}
// Note: This will always allow a duplicate header if the header is empty
final boolean containsHeader = header != null && hdrMap.containsKey(header);
if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) {
throw new IllegalArgumentException(
String.format(
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
header, Arrays.toString(headerRecord)));
}
if (header != null) {
hdrMap.put(header, Integer.valueOf(i));
if (headerNames == null) {
headerNames = new ArrayList<>(headerRecord.length);
}
headerNames.add(header);
}
}
}
}
if (headerNames == null) {
headerNames = Collections.emptyList(); //immutable
} else {
headerNames = Collections.unmodifiableList(headerNames);
}
return new Headers(hdrMap, headerNames);
}
/**
* Returns the current line number in the input stream.
*
* <p>
* <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
* the record number.
* </p>
*
* @return current line number
*/
public long getCurrentLineNumber() {
return this.lexer.getCurrentLineNumber();
}
/**
* Gets the first end-of-line string encountered.
*
* @return the first end-of-line string
* @since 1.5
*/
public String getFirstEndOfLine() {
return lexer.getFirstEol();
}
/**
* Returns a copy of the header map.
* <p>
* The map keys are column names. The map values are 0-based indices.
* </p>
* <p>
* Note: The map can only provide a one-to-one mapping when the format did not
* contain null or duplicate column names.
* </p>
*
* @return a copy of the header map.
*/
public Map<String, Integer> getHeaderMap() {
if (this.headerMap == null) {
return null;
}
final Map<String, Integer> map = createEmptyHeaderMap();
map.putAll(this.headerMap);
return map;
}
/**
* Returns the header map.
*
* @return the header map.
*/
Map<String, Integer> getHeaderMapRaw() {
return this.headerMap;
}
/**
* Returns a read-only list of header names that iterates in column order.
* <p>
* Note: The list provides strings that can be used as keys in the header map.
* The list will not contain null column names if they were present in the input
* format.
* </p>
*
* @return read-only list of header names that iterates in column order.
* @see #getHeaderMap()
* @since 1.7
*/
public List<String> getHeaderNames() {
return headerNames;
}
/**
* Returns the current record number in the input stream.
*
* <p>
* <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
* the line number.
* </p>
*
* @return current record number
*/
public long getRecordNumber() {
return this.recordNumber;
}
/**
* Parses the CSV input according to the given format and returns the content as a list of
* {@link CSVRecord CSVRecords}.
*
* <p>
* The returned content starts at the current parse-position in the stream.
* </p>
*
* @return list of {@link CSVRecord CSVRecords}, may be empty
* @throws IOException
* on parse error or input read-failure
*/
public List<CSVRecord> getRecords() throws IOException {
CSVRecord rec;
final List<CSVRecord> records = new ArrayList<>();
while ((rec = this.nextRecord()) != null) {
records.add(rec);
}
return records;
}
/**
* Gets whether this parser is closed.
*
* @return whether this parser is closed.
*/
public boolean isClosed() {
return this.lexer.isClosed();
}
/**
* Returns an iterator on the records.
*
* <p>
* An {@link IOException} caught during the iteration are re-thrown as an
* {@link IllegalStateException}.
* </p>
* <p>
* If the parser is closed a call to {@link Iterator#next()} will throw a
* {@link NoSuchElementException}.
* </p>
*/
@Override
public Iterator<CSVRecord> iterator() {
return csvRecordIterator;
}
/**
* Parses the next record from the current point in the stream.
*
* @return the record as an array of values, or {@code null} if the end of the stream has been reached
* @throws IOException
* on parse error or input read-failure
*/
CSVRecord nextRecord() throws IOException {
CSVRecord result = null;
this.recordList.clear();
StringBuilder sb = null;
final long startCharPosition = lexer.getCharacterPosition() + this.characterOffset;
do {
this.reusableToken.reset();
this.lexer.nextToken(this.reusableToken);
switch (this.reusableToken.type) {
case TOKEN:
this.addRecordValue(false);
break;
case EORECORD:
this.addRecordValue(true);
break;
case EOF:
if (this.reusableToken.isReady) {
this.addRecordValue(true);
}
break;
case INVALID:
throw new IOException("(line " + this.getCurrentLineNumber() + ") invalid parse sequence");
case COMMENT: // Ignored currently
if (sb == null) { // first comment for this record
sb = new StringBuilder();
} else {
sb.append(Constants.LF);
}
sb.append(this.reusableToken.content);
this.reusableToken.type = TOKEN; // Read another token
break;
default:
throw new IllegalStateException("Unexpected Token type: " + this.reusableToken.type);
}
} while (this.reusableToken.type == TOKEN);
if (!this.recordList.isEmpty()) {
this.recordNumber++;
final String comment = sb == null ? null : sb.toString();
result = new CSVRecord(this, this.recordList.toArray(new String[this.recordList.size()]),
comment, this.recordNumber, startCharPosition);
}
return result;
}
}

View File

@ -1,392 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import static org.apache.commons.csv.Constants.CR;
import static org.apache.commons.csv.Constants.LF;
import static org.apache.commons.csv.Constants.SP;
import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
import java.sql.Clob;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.Objects;
/**
* Prints values in a {@link CSVFormat CSV format}.
*
* <p>Values can be appended to the output by calling the {@link #print(Object)} method.
* Values are printed according to {@link String#valueOf(Object)}.
* To complete a record the {@link #println()} method has to be called.
* Comments can be appended by calling {@link #printComment(String)}.
* However a comment will only be written to the output if the {@link CSVFormat} supports comments.
* </p>
*
* <p>The printer also supports appending a complete record at once by calling {@link #printRecord(Object...)}
* or {@link #printRecord(Iterable)}.
* Furthermore {@link #printRecords(Object...)}, {@link #printRecords(Iterable)} and {@link #printRecords(ResultSet)}
* methods can be used to print several records at once.
* </p>
*
* <p>Example:</p>
*
* <pre>
* try (CSVPrinter printer = new CSVPrinter(new FileWriter("csv.txt"), CSVFormat.EXCEL)) {
* printer.printRecord("id", "userName", "firstName", "lastName", "birthday");
* printer.printRecord(1, "john73", "John", "Doe", LocalDate.of(1973, 9, 15));
* printer.println();
* printer.printRecord(2, "mary", "Mary", "Meyer", LocalDate.of(1985, 3, 29));
* } catch (IOException ex) {
* ex.printStackTrace();
* }
* </pre>
*
* <p>This code will write the following to csv.txt:</p>
* <pre>
* id,userName,firstName,lastName,birthday
* 1,john73,John,Doe,1973-09-15
*
* 2,mary,Mary,Meyer,1985-03-29
* </pre>
*/
public final class CSVPrinter implements Flushable, Closeable {
/** The place that the values get written. */
private final Appendable out;
private final CSVFormat format;
/** True if we just began a new record. */
private boolean newRecord = true;
/**
* Creates a printer that will print values to the given stream following the CSVFormat.
* <p>
* Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation
* and escaping with a different character) are not supported.
* </p>
*
* @param out
* stream to which to print. Must not be null.
* @param format
* the CSV format. Must not be null.
* @throws IOException
* thrown if the optional header cannot be printed.
* @throws IllegalArgumentException
* thrown if the parameters of the format are inconsistent or if either out or format are null.
*/
public CSVPrinter(final Appendable out, final CSVFormat format) throws IOException {
Objects.requireNonNull(out, "out");
Objects.requireNonNull(format, "format");
this.out = out;
this.format = format;
// TODO: Is it a good idea to do this here instead of on the first call to a print method?
// It seems a pain to have to track whether the header has already been printed or not.
if (format.getHeaderComments() != null) {
for (final String line : format.getHeaderComments()) {
if (line != null) {
this.printComment(line);
}
}
}
if (format.getHeader() != null && !format.getSkipHeaderRecord()) {
this.printRecord((Object[]) format.getHeader());
}
}
// ======================================================
// printing implementation
// ======================================================
@Override
public void close() throws IOException {
close(false);
}
/**
* Closes the underlying stream with an optional flush first.
* @param flush whether to flush before the actual close.
*
* @throws IOException
* If an I/O error occurs
* @since 1.6
*/
public void close(final boolean flush) throws IOException {
if (flush || format.getAutoFlush()) {
flush();
}
if (out instanceof Closeable) {
((Closeable) out).close();
}
}
/**
* Flushes the underlying stream.
*
* @throws IOException
* If an I/O error occurs
*/
@Override
public void flush() throws IOException {
if (out instanceof Flushable) {
((Flushable) out).flush();
}
}
/**
* Gets the target Appendable.
*
* @return the target Appendable.
*/
public Appendable getOut() {
return this.out;
}
/**
* Prints the string as the next value on the line. The value will be escaped or encapsulated as needed.
*
* @param value
* value to be output.
* @throws IOException
* If an I/O error occurs
*/
public void print(final Object value) throws IOException {
format.print(value, out, newRecord);
newRecord = false;
}
/**
* Prints a comment on a new line among the delimiter separated values.
*
* <p>
* Comments will always begin on a new line and occupy at least one full line. The character specified to start
* comments and a space will be inserted at the beginning of each new line in the comment.
* </p>
*
* <p>
* If comments are disabled in the current CSV format this method does nothing.
* </p>
*
* <p>This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()}
* to start a new line of the comment. Note that this might produce unexpected results for formats that do not use
* line breaks as record separator.</p>
*
* @param comment
* the comment to output
* @throws IOException
* If an I/O error occurs
*/
public void printComment(final String comment) throws IOException {
if (!format.isCommentMarkerSet()) {
return;
}
if (!newRecord) {
println();
}
out.append(format.getCommentMarker().charValue());
out.append(SP);
final int commentLength = comment.length();
for (int i = 0; i < commentLength; i++) {
final char c = comment.charAt(i);
switch (c) {
case CR:
if (i + 1 < commentLength && comment.charAt(i + 1) == LF) {
i++;
}
//$FALL-THROUGH$ break intentionally excluded.
case LF:
println();
out.append(format.getCommentMarker().charValue());
out.append(SP);
break;
default:
out.append(c);
break;
}
}
println();
}
/**
* Outputs the record separator.
*
* @throws IOException
* If an I/O error occurs
*/
public void println() throws IOException {
format.println(out);
newRecord = true;
}
/**
* Prints the given values a single record of delimiter separated values followed by the record separator.
*
* <p>
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
* separator to the output after printing the record, so there is no need to call {@link #println()}.
* </p>
*
* @param values
* values to output.
* @throws IOException
* If an I/O error occurs
*/
public void printRecord(final Iterable<?> values) throws IOException {
for (final Object value : values) {
print(value);
}
println();
}
/**
* Prints the given values a single record of delimiter separated values followed by the record separator.
*
* <p>
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
* separator to the output after printing the record, so there is no need to call {@link #println()}.
* </p>
*
* @param values
* values to output.
* @throws IOException
* If an I/O error occurs
*/
public void printRecord(final Object... values) throws IOException {
format.printRecord(out, values);
newRecord = true;
}
/**
* Prints all the objects in the given collection handling nested collections/arrays as records.
*
* <p>
* If the given collection only contains simple objects, this method will print a single record like
* {@link #printRecord(Iterable)}. If the given collections contains nested collections/arrays those nested elements
* will each be printed as records using {@link #printRecord(Object...)}.
* </p>
*
* <p>
* Given the following data structure:
* </p>
*
* <pre>
* <code>
* List&lt;String[]&gt; data = ...
* data.add(new String[]{ "A", "B", "C" });
* data.add(new String[]{ "1", "2", "3" });
* data.add(new String[]{ "A1", "B2", "C3" });
* </code>
* </pre>
*
* <p>
* Calling this method will print:
* </p>
*
* <pre>
* <code>
* A, B, C
* 1, 2, 3
* A1, B2, C3
* </code>
* </pre>
*
* @param values
* the values to print.
* @throws IOException
* If an I/O error occurs
*/
public void printRecords(final Iterable<?> values) throws IOException {
for (final Object value : values) {
if (value instanceof Object[]) {
this.printRecord((Object[]) value);
} else if (value instanceof Iterable) {
this.printRecord((Iterable<?>) value);
} else {
this.printRecord(value);
}
}
}
/**
* Prints all the objects in the given array handling nested collections/arrays as records.
*
* <p>
* If the given array only contains simple objects, this method will print a single record like
* {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested
* elements will each be printed as records using {@link #printRecord(Object...)}.
* </p>
*
* <p>
* Given the following data structure:
* </p>
*
* <pre>
* <code>
* String[][] data = new String[3][]
* data[0] = String[]{ "A", "B", "C" };
* data[1] = new String[]{ "1", "2", "3" };
* data[2] = new String[]{ "A1", "B2", "C3" };
* </code>
* </pre>
*
* <p>
* Calling this method will print:
* </p>
*
* <pre>
* <code>
* A, B, C
* 1, 2, 3
* A1, B2, C3
* </code>
* </pre>
*
* @param values
* the values to print.
* @throws IOException
* If an I/O error occurs
*/
public void printRecords(final Object... values) throws IOException {
printRecords(Arrays.asList(values));
}
/**
* Prints all the objects in the given JDBC result set.
*
* @param resultSet
* result set the values to print.
* @throws IOException
* If an I/O error occurs
* @throws SQLException
* if a database access error occurs
*/
public void printRecords(final ResultSet resultSet) throws SQLException, IOException {
final int columnCount = resultSet.getMetaData().getColumnCount();
while (resultSet.next()) {
for (int i = 1; i <= columnCount; i++) {
final Object object = resultSet.getObject(i);
// TODO Who manages the Clob? The JDBC driver or must we close it? Is it driver-dependent?
print(object instanceof Clob ? ((Clob) object).getCharacterStream() : object);
}
println();
}
}
}

View File

@ -1,329 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
/**
* A CSV record parsed from a CSV file.
*
* <p>
* Note: Support for {@link Serializable} is scheduled to be removed in version 2.0.
* In version 1.8 the mapping between the column header and the column index was
* removed from the serialised state. The class maintains serialization compatibility
* with versions pre-1.8 for the record values; these must be accessed by index
* following deserialization. There will be loss of any functionally linked to the header
* mapping when transferring serialised forms pre-1.8 to 1.8 and vice versa.
* </p>
*/
public final class CSVRecord implements Serializable, Iterable<String> {
private static final String[] EMPTY_STRING_ARRAY = new String[0];
private static final long serialVersionUID = 1L;
private final long characterPosition;
/** The accumulated comments (if any) */
private final String comment;
/** The record number. */
private final long recordNumber;
/** The values of the record */
private final String[] values;
/** The parser that originates this record. This is not serialized. */
private final transient CSVParser parser;
CSVRecord(final CSVParser parser, final String[] values, final String comment, final long recordNumber,
final long characterPosition) {
this.recordNumber = recordNumber;
this.values = values != null ? values : EMPTY_STRING_ARRAY;
this.parser = parser;
this.comment = comment;
this.characterPosition = characterPosition;
}
/**
* Returns a value by {@link Enum}.
*
* @param e
* an enum
* @return the String at the given enum String
*/
public String get(final Enum<?> e) {
return get(Objects.toString(e, null));
}
/**
* Returns a value by index.
*
* @param i
* a column index (0-based)
* @return the String at the given index
*/
public String get(final int i) {
return values[i];
}
/**
* Returns a value by name.
*
* <p>
* Note: This requires a field mapping obtained from the original parser.
* A check using {@link #isMapped(String)} should be used to determine if a
* mapping exists from the provided {@code name} to a field index. In this case an
* exception will only be thrown if the record does not contain a field corresponding
* to the mapping, that is the record length is not consistent with the mapping size.
* </p>
*
* @param name
* the name of the column to be retrieved.
* @return the column value, maybe null depending on {@link CSVFormat#getNullString()}.
* @throws IllegalStateException
* if no header mapping was provided
* @throws IllegalArgumentException
* if {@code name} is not mapped or if the record is inconsistent
* @see #isMapped(String)
* @see #isConsistent()
* @see #getParser()
* @see CSVFormat#withNullString(String)
*/
public String get(final String name) {
final Map<String, Integer> headerMap = getHeaderMapRaw();
if (headerMap == null) {
throw new IllegalStateException(
"No header mapping was specified, the record values can't be accessed by name");
}
final Integer index = headerMap.get(name);
if (index == null) {
throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name,
headerMap.keySet()));
}
try {
return values[index.intValue()];
} catch (final ArrayIndexOutOfBoundsException e) {
throw new IllegalArgumentException(String.format(
"Index for header '%s' is %d but CSVRecord only has %d values!", name, index,
Integer.valueOf(values.length)));
}
}
/**
* Returns the start position of this record as a character position in the source stream. This may or may not
* correspond to the byte position depending on the character set.
*
* @return the position of this record in the source stream.
*/
public long getCharacterPosition() {
return characterPosition;
}
/**
* Returns the comment for this record, if any.
* Note that comments are attached to the following record.
* If there is no following record (i.e. the comment is at EOF)
* the comment will be ignored.
*
* @return the comment for this record, or null if no comment for this record is available.
*/
public String getComment() {
return comment;
}
private Map<String, Integer> getHeaderMapRaw() {
return parser == null ? null : parser.getHeaderMapRaw();
}
/**
* Returns the parser.
*
* <p>
* Note: The parser is not part of the serialized state of the record. A null check
* should be used when the record may have originated from a serialized form.
* </p>
*
* @return the parser.
* @since 1.7
*/
public CSVParser getParser() {
return parser;
}
/**
* Returns the number of this record in the parsed CSV file.
*
* <p>
* <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
* the current line number of the parser that created this record.
* </p>
*
* @return the number of this record.
* @see CSVParser#getCurrentLineNumber()
*/
public long getRecordNumber() {
return recordNumber;
}
/**
* Checks whether this record has a comment, false otherwise.
* Note that comments are attached to the following record.
* If there is no following record (i.e. the comment is at EOF)
* the comment will be ignored.
*
* @return true if this record has a comment, false otherwise
* @since 1.3
*/
public boolean hasComment() {
return comment != null;
}
/**
* Tells whether the record size matches the header size.
*
* <p>
* Returns true if the sizes for this record match and false if not. Some programs can export files that fail this
* test but still produce parsable files.
* </p>
*
* @return true of this record is valid, false if not
*/
public boolean isConsistent() {
final Map<String, Integer> headerMap = getHeaderMapRaw();
return headerMap == null || headerMap.size() == values.length;
}
/**
* Checks whether a given column is mapped, i.e. its name has been defined to the parser.
*
* @param name
* the name of the column to be retrieved.
* @return whether a given column is mapped.
*/
public boolean isMapped(final String name) {
final Map<String, Integer> headerMap = getHeaderMapRaw();
return headerMap != null && headerMap.containsKey(name);
}
/**
* Checks whether a column with given index has a value.
*
* @param index
* a column index (0-based)
* @return whether a column with given index has a value
*/
public boolean isSet(final int index) {
return 0 <= index && index < values.length;
}
/**
* Checks whether a given columns is mapped and has a value.
*
* @param name
* the name of the column to be retrieved.
* @return whether a given columns is mapped and has a value
*/
public boolean isSet(final String name) {
return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
}
/**
* Returns an iterator over the values of this record.
*
* @return an iterator over the values of this record.
*/
@Override
public Iterator<String> iterator() {
return toList().iterator();
}
/**
* Puts all values of this record into the given Map.
*
* @param map
* The Map to populate.
* @return the given map.
* @since 1.9.0
*/
public <M extends Map<String, String>> M putIn(final M map) {
if (getHeaderMapRaw() == null) {
return map;
}
for (final Entry<String, Integer> entry : getHeaderMapRaw().entrySet()) {
final int col = entry.getValue().intValue();
if (col < values.length) {
map.put(entry.getKey(), values[col]);
}
}
return map;
}
/**
* Returns the number of values in this record.
*
* @return the number of values.
*/
public int size() {
return values.length;
}
/**
* Converts the values to a List.
*
* TODO: Maybe make this public?
*
* @return a new List
*/
private List<String> toList() {
return Arrays.asList(values);
}
/**
* Copies this record into a new Map of header name to record value.
*
* @return A new Map. The map is empty if the record has no headers.
*/
public Map<String, String> toMap() {
return putIn(new LinkedHashMap<String, String>(values.length));
}
/**
* Returns a string representation of the contents of this record. The result is constructed by comment, mapping,
* recordNumber and by passing the internal values array to {@link Arrays#toString(Object[])}.
*
* @return a String representation of this record.
*/
@Override
public String toString() {
return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" +
Arrays.toString(values) + "]";
}
String[] values() {
return values;
}
}

View File

@ -1,82 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
/**
* Constants for this package.
*/
final class Constants {
static final char BACKSLASH = '\\';
static final char BACKSPACE = '\b';
static final char COMMA = ',';
/**
* Starts a comment, the remainder of the line is the comment.
*/
static final char COMMENT = '#';
static final char CR = '\r';
/** RFC 4180 defines line breaks as CRLF */
static final String CRLF = "\r\n";
static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"');
static final String EMPTY = "";
/** The end of stream symbol */
static final int END_OF_STREAM = -1;
static final char FF = '\f';
static final char LF = '\n';
/**
* Unicode line separator.
*/
static final String LINE_SEPARATOR = "\u2028";
/**
* Unicode next line.
*/
static final String NEXT_LINE = "\u0085";
/**
* Unicode paragraph separator.
*/
static final String PARAGRAPH_SEPARATOR = "\u2029";
static final char PIPE = '|';
/** ASCII record separator */
static final char RS = 30;
static final char SP = ' ';
static final char TAB = '\t';
/** Undefined state for the lookahead char */
static final int UNDEFINED = -2;
/** ASCII unit separator */
static final char US = 31;
}

View File

@ -1,191 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import static org.apache.commons.csv.Constants.CR;
import static org.apache.commons.csv.Constants.END_OF_STREAM;
import static org.apache.commons.csv.Constants.LF;
import static org.apache.commons.csv.Constants.UNDEFINED;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
/**
* A special buffered reader which supports sophisticated read access.
* <p>
* In particular the reader supports a look-ahead option, which allows you to see the next char returned by
* {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}.
* </p>
*/
final class ExtendedBufferedReader extends BufferedReader {
/** The last char returned */
private int lastChar = UNDEFINED;
/** The count of EOLs (CR/LF/CRLF) seen so far */
private long eolCounter;
/** The position, which is number of characters read so far */
private long position;
private boolean closed;
/**
* Created extended buffered reader using default buffer-size
*/
ExtendedBufferedReader(final Reader reader) {
super(reader);
}
/**
* Closes the stream.
*
* @throws IOException
* If an I/O error occurs
*/
@Override
public void close() throws IOException {
// Set ivars before calling super close() in case close() throws an IOException.
closed = true;
lastChar = END_OF_STREAM;
super.close();
}
/**
* Returns the current line number
*
* @return the current line number
*/
long getCurrentLineNumber() {
// Check if we are at EOL or EOF or just starting
if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
return eolCounter; // counter is accurate
}
return eolCounter + 1; // Allow for counter being incremented only at EOL
}
/**
* Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by
* any of the read methods. This will not include a character read using the {@link #lookAhead()} method. If no
* character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached
* on the last read then this will return {@link Constants#END_OF_STREAM}.
*
* @return the last character that was read
*/
int getLastChar() {
return lastChar;
}
/**
* Gets the character position in the reader.
*
* @return the current position in the reader (counting characters, not bytes since this is a Reader)
*/
long getPosition() {
return this.position;
}
public boolean isClosed() {
return closed;
}
/**
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
* still return this value. Does not affect line number or last character.
*
* @return the next character
*
* @throws IOException
* if there is an error in reading
*/
int lookAhead() throws IOException {
super.mark(1);
final int c = super.read();
super.reset();
return c;
}
@Override
public int read() throws IOException {
final int current = super.read();
if (current == CR || current == LF && lastChar != CR) {
eolCounter++;
}
lastChar = current;
this.position++;
return lastChar;
}
@Override
public int read(final char[] buf, final int offset, final int length) throws IOException {
if (length == 0) {
return 0;
}
final int len = super.read(buf, offset, length);
if (len > 0) {
for (int i = offset; i < offset + len; i++) {
final char ch = buf[i];
if (ch == LF) {
if (CR != (i > 0 ? buf[i - 1] : lastChar)) {
eolCounter++;
}
} else if (ch == CR) {
eolCounter++;
}
}
lastChar = buf[offset + len - 1];
} else if (len == -1) {
lastChar = END_OF_STREAM;
}
position += len;
return len;
}
/**
* Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
* when processing a comment, otherwise information can be lost.
* <p>
* Increments {@link #eolCounter}
* <p>
* Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise to LF
*
* @return the line that was read, or null if reached EOF.
*/
@Override
public String readLine() throws IOException {
final String line = super.readLine();
if (line != null) {
lastChar = LF; // needed for detecting start of line
eolCounter++;
} else {
lastChar = END_OF_STREAM;
}
return line;
}
}

View File

@ -1,139 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.nio.CharBuffer;
/** Copied from Apache Commons IO. */
class IOUtils {
/**
* <p>
* Copied from Apache Commons IO.
* </p>
* The default buffer size ({@value}).
*/
static final int DEFAULT_BUFFER_SIZE = 1024 * 4;
/**
* <p>
* Copied from Apache Commons IO.
* </p>
* Represents the end-of-file (or stream).
* @since 2.5 (made public)
*/
private static final int EOF = -1;
/**
* Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}.
* <p>
* This method buffers the input internally, so there is no need to use a
* {@code BufferedReader}.
* </p>
* The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}.
*
* @param input the {@code Reader} to read from
* @param output the {@code Appendable} to append to
* @return the number of characters copied
* @throws NullPointerException if the input or output is null
* @throws IOException if an I/O error occurs
* @since 2.7
*/
static long copy(final Reader input, final Appendable output) throws IOException {
return copy(input, output, CharBuffer.allocate(DEFAULT_BUFFER_SIZE));
}
/**
* Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}.
* <p>
* This method uses the provided buffer, so there is no need to use a
* {@code BufferedReader}.
* </p>
*
* @param input the {@code Reader} to read from
* @param output the {@code Appendable} to write to
* @param buffer the buffer to be used for the copy
* @return the number of characters copied
* @throws NullPointerException if the input or output is null
* @throws IOException if an I/O error occurs
* @since 2.7
*/
static long copy(final Reader input, final Appendable output, final CharBuffer buffer) throws IOException {
long count = 0;
int n;
while (EOF != (n = input.read(buffer))) {
buffer.flip();
output.append(buffer, 0, n);
count += n;
}
return count;
}
/**
* <p>
* Copied from Apache Commons IO.
* </p>
* Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}.
* <p>
* This method buffers the input internally, so there is no need to use a
* {@code BufferedReader}.
* <p>
* The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}.
*
* @param input the {@code Reader} to read from
* @param output the {@code Writer} to write to
* @return the number of characters copied
* @throws NullPointerException if the input or output is null
* @throws IOException if an I/O error occurs
* @since 1.3
*/
static long copyLarge(final Reader input, final Writer output) throws IOException {
return copyLarge(input, output, new char[DEFAULT_BUFFER_SIZE]);
}
/**
* <p>
* Copied from Apache Commons IO.
* </p>
* Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}.
* <p>
* This method uses the provided buffer, so there is no need to use a
* {@code BufferedReader}.
* <p>
*
* @param input the {@code Reader} to read from
* @param output the {@code Writer} to write to
* @param buffer the buffer to be used for the copy
* @return the number of characters copied
* @throws NullPointerException if the input or output is null
* @throws IOException if an I/O error occurs
* @since 2.2
*/
static long copyLarge(final Reader input, final Writer output, final char[] buffer) throws IOException {
long count = 0;
int n;
while (EOF != (n = input.read(buffer))) {
output.write(buffer, 0, n);
count += n;
}
return count;
}
}

View File

@ -1,461 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import static org.apache.commons.csv.Constants.BACKSPACE;
import static org.apache.commons.csv.Constants.CR;
import static org.apache.commons.csv.Constants.END_OF_STREAM;
import static org.apache.commons.csv.Constants.FF;
import static org.apache.commons.csv.Constants.LF;
import static org.apache.commons.csv.Constants.TAB;
import static org.apache.commons.csv.Constants.UNDEFINED;
import static org.apache.commons.csv.Token.Type.COMMENT;
import static org.apache.commons.csv.Token.Type.EOF;
import static org.apache.commons.csv.Token.Type.EORECORD;
import static org.apache.commons.csv.Token.Type.INVALID;
import static org.apache.commons.csv.Token.Type.TOKEN;
import java.io.Closeable;
import java.io.IOException;
/**
* Lexical analyzer.
*/
final class Lexer implements Closeable {
private static final String CR_STRING = Character.toString(CR);
private static final String LF_STRING = Character.toString(LF);
/**
* Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it
* won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two
* chars (using surrogates) and thus there should never be a collision with a real text char.
*/
private static final char DISABLED = '\ufffe';
private final char delimiter;
private final char escape;
private final char quoteChar;
private final char commentStart;
private final boolean ignoreSurroundingSpaces;
private final boolean ignoreEmptyLines;
/** The input stream */
private final ExtendedBufferedReader reader;
private String firstEol;
Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
this.reader = reader;
this.delimiter = format.getDelimiter();
this.escape = mapNullToDisabled(format.getEscapeCharacter());
this.quoteChar = mapNullToDisabled(format.getQuoteCharacter());
this.commentStart = mapNullToDisabled(format.getCommentMarker());
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
}
/**
* Closes resources.
*
* @throws IOException
* If an I/O error occurs
*/
@Override
public void close() throws IOException {
reader.close();
}
/**
* Returns the current character position
*
* @return the current character position
*/
long getCharacterPosition() {
return reader.getPosition();
}
/**
* Returns the current line number
*
* @return the current line number
*/
long getCurrentLineNumber() {
return reader.getCurrentLineNumber();
}
String getFirstEol(){
return firstEol;
}
boolean isClosed() {
return reader.isClosed();
}
boolean isCommentStart(final int ch) {
return ch == commentStart;
}
boolean isDelimiter(final int ch) {
return ch == delimiter;
}
/**
* @return true if the given character indicates end of file
*/
boolean isEndOfFile(final int ch) {
return ch == END_OF_STREAM;
}
boolean isEscape(final int ch) {
return ch == escape;
}
private boolean isMetaChar(final int ch) {
return ch == delimiter ||
ch == escape ||
ch == quoteChar ||
ch == commentStart;
}
boolean isQuoteChar(final int ch) {
return ch == quoteChar;
}
/**
* Checks if the current character represents the start of a line: a CR, LF or is at the start of the file.
*
* @param ch the character to check
* @return true if the character is at the start of a line.
*/
boolean isStartOfLine(final int ch) {
return ch == LF || ch == CR || ch == UNDEFINED;
}
/**
* @return true if the given char is a whitespace character
*/
boolean isWhitespace(final int ch) {
return !isDelimiter(ch) && Character.isWhitespace((char) ch);
}
private char mapNullToDisabled(final Character c) {
return c == null ? DISABLED : c.charValue();
}
/**
* Returns the next token.
* <p>
* A token corresponds to a term, a record change or an end-of-file indicator.
* </p>
*
* @param token
* an existing Token object to reuse. The caller is responsible to initialize the Token.
* @return the next token found
* @throws java.io.IOException
* on stream access error
*/
Token nextToken(final Token token) throws IOException {
// get the last read char (required for empty line detection)
int lastChar = reader.getLastChar();
// read the next char and set eol
int c = reader.read();
/*
* Note: The following call will swallow LF if c == CR. But we don't need to know if the last char was CR or LF
* - they are equivalent here.
*/
boolean eol = readEndOfLine(c);
// empty line detection: eol AND (last char was EOL or beginning)
if (ignoreEmptyLines) {
while (eol && isStartOfLine(lastChar)) {
// go on char ahead ...
lastChar = c;
c = reader.read();
eol = readEndOfLine(c);
// reached end of file without any content (empty line at the end)
if (isEndOfFile(c)) {
token.type = EOF;
// don't set token.isReady here because no content
return token;
}
}
}
// did we reach eof during the last iteration already ? EOF
if (isEndOfFile(lastChar) || !isDelimiter(lastChar) && isEndOfFile(c)) {
token.type = EOF;
// don't set token.isReady here because no content
return token;
}
if (isStartOfLine(lastChar) && isCommentStart(c)) {
final String line = reader.readLine();
if (line == null) {
token.type = EOF;
// don't set token.isReady here because no content
return token;
}
final String comment = line.trim();
token.content.append(comment);
token.type = COMMENT;
return token;
}
// important: make sure a new char gets consumed in each iteration
while (token.type == INVALID) {
// ignore whitespaces at beginning of a token
if (ignoreSurroundingSpaces) {
while (isWhitespace(c) && !eol) {
c = reader.read();
eol = readEndOfLine(c);
}
}
// ok, start of token reached: encapsulated, or token
if (isDelimiter(c)) {
// empty token return TOKEN("")
token.type = TOKEN;
} else if (eol) {
// empty token return EORECORD("")
// noop: token.content.append("");
token.type = EORECORD;
} else if (isQuoteChar(c)) {
// consume encapsulated token
parseEncapsulatedToken(token);
} else if (isEndOfFile(c)) {
// end of file return EOF()
// noop: token.content.append("");
token.type = EOF;
token.isReady = true; // there is data at EOF
} else {
// next token must be a simple token
// add removed blanks when not ignoring whitespace chars...
parseSimpleToken(token, c);
}
}
return token;
}
/**
* Parses an encapsulated token.
* <p/>
* Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included
* in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after
* an encapsulated token are ignored. The token is finished when one of the following conditions become true:
* <ul>
* <li>an unescaped encapsulator has been reached, and is followed by optional whitespace then:</li>
* <ul>
* <li>delimiter (TOKEN)</li>
* <li>end of line (EORECORD)</li>
* </ul>
* <li>end of stream has been reached (EOF)</li> </ul>
*
* @param token
* the current token
* @return a valid token object
* @throws IOException
* on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL
*/
private Token parseEncapsulatedToken(final Token token) throws IOException {
// save current line number in case needed for IOE
final long startLineNumber = getCurrentLineNumber();
int c;
while (true) {
c = reader.read();
if (isEscape(c)) {
final int unescaped = readEscape();
if (unescaped == END_OF_STREAM) { // unexpected char after escape
token.content.append((char) c).append((char) reader.getLastChar());
} else {
token.content.append((char) unescaped);
}
} else if (isQuoteChar(c)) {
if (isQuoteChar(reader.lookAhead())) {
// double or escaped encapsulator -> add single encapsulator to token
c = reader.read();
token.content.append((char) c);
} else {
// token finish mark (encapsulator) reached: ignore whitespace till delimiter
while (true) {
c = reader.read();
if (isDelimiter(c)) {
token.type = TOKEN;
return token;
} else if (isEndOfFile(c)) {
token.type = EOF;
token.isReady = true; // There is data at EOF
return token;
} else if (readEndOfLine(c)) {
token.type = EORECORD;
return token;
} else if (!isWhitespace(c)) {
// error invalid char between token and next delimiter
throw new IOException("(line " + getCurrentLineNumber() +
") invalid char between encapsulated token and delimiter");
}
}
}
} else if (isEndOfFile(c)) {
// error condition (end of file before end of token)
throw new IOException("(startline " + startLineNumber +
") EOF reached before encapsulated token finished");
} else {
// consume character
token.content.append((char) c);
}
}
}
/**
* Parses a simple token.
* <p/>
* Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
* delimiters (as \, or \;). The token is finished when one of the following conditions become true:
* <ul>
* <li>end of line has been reached (EORECORD)</li>
* <li>end of stream has been reached (EOF)</li>
* <li>an unescaped delimiter has been reached (TOKEN)</li>
* </ul>
*
* @param token
* the current token
* @param ch
* the current character
* @return the filled token
* @throws IOException
* on stream access error
*/
private Token parseSimpleToken(final Token token, int ch) throws IOException {
// Faster to use while(true)+break than while(token.type == INVALID)
while (true) {
if (readEndOfLine(ch)) {
token.type = EORECORD;
break;
} else if (isEndOfFile(ch)) {
token.type = EOF;
token.isReady = true; // There is data at EOF
break;
} else if (isDelimiter(ch)) {
token.type = TOKEN;
break;
} else if (isEscape(ch)) {
final int unescaped = readEscape();
if (unescaped == END_OF_STREAM) { // unexpected char after escape
token.content.append((char) ch).append((char) reader.getLastChar());
} else {
token.content.append((char) unescaped);
}
ch = reader.read(); // continue
} else {
token.content.append((char) ch);
ch = reader.read(); // continue
}
}
if (ignoreSurroundingSpaces) {
trimTrailingSpaces(token.content);
}
return token;
}
/**
* Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character...
*
* @return true if the given or next character is a line-terminator
*/
boolean readEndOfLine(int ch) throws IOException {
// check if we have \r\n...
if (ch == CR && reader.lookAhead() == LF) {
// note: does not change ch outside of this method!
ch = reader.read();
// Save the EOL state
if (firstEol == null) {
this.firstEol = Constants.CRLF;
}
}
// save EOL state here.
if (firstEol == null) {
if (ch == LF) {
this.firstEol = LF_STRING;
} else if (ch == CR) {
this.firstEol = CR_STRING;
}
}
return ch == LF || ch == CR;
}
// TODO escape handling needs more work
/**
* Handle an escape sequence.
* The current character must be the escape character.
* On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()}
* on the input stream.
*
* @return the unescaped character (as an int) or {@link Constants#END_OF_STREAM} if char following the escape is
* invalid.
* @throws IOException if there is a problem reading the stream or the end of stream is detected:
* the escape character is not allowed at end of stream
*/
int readEscape() throws IOException {
// the escape char has just been read (normally a backslash)
final int ch = reader.read();
switch (ch) {
case 'r':
return CR;
case 'n':
return LF;
case 't':
return TAB;
case 'b':
return BACKSPACE;
case 'f':
return FF;
case CR:
case LF:
case FF: // TODO is this correct?
case TAB: // TODO is this correct? Do tabs need to be escaped?
case BACKSPACE: // TODO is this correct?
return ch;
case END_OF_STREAM:
throw new IOException("EOF whilst processing escape sequence");
default:
// Now check for meta-characters
if (isMetaChar(ch)) {
return ch;
}
// indicate unexpected char - available from in.getLastChar()
return END_OF_STREAM;
}
}
void trimTrailingSpaces(final StringBuilder buffer) {
int length = buffer.length();
while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
length = length - 1;
}
if (length != buffer.length()) {
buffer.setLength(length);
}
}
}

View File

@ -1,50 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
/**
* Defines quoting behavior when printing.
*/
public enum QuoteMode {
/**
* Quotes all fields.
*/
ALL,
/**
* Quotes all non-null fields.
*/
ALL_NON_NULL,
/**
* Quotes fields which contain special characters such as a the field delimiter, quote character or any of the
* characters in the line separator string.
*/
MINIMAL,
/**
* Quotes all non-numeric fields.
*/
NON_NUMERIC,
/**
* Never quotes fields. When the delimiter occurs in data, the printer prefixes it with the escape character. If the
* escape character is not set, format validation throws an exception.
*/
NONE
}

View File

@ -1,73 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import static org.apache.commons.csv.Token.Type.INVALID;
/**
* Internal token representation.
* <p/>
* It is used as contract between the lexer and the parser.
*/
final class Token {
enum Type {
/** Token has no valid content, i.e. is in its initialized state. */
INVALID,
/** Token with content, at beginning or in the middle of a line. */
TOKEN,
/** Token (which can have content) when the end of file is reached. */
EOF,
/** Token with content when the end of a line is reached. */
EORECORD,
/** Token is a comment line. */
COMMENT
}
/** length of the initial token (content-)buffer */
private static final int INITIAL_TOKEN_LENGTH = 50;
/** Token type */
Token.Type type = INVALID;
/** The content buffer. */
final StringBuilder content = new StringBuilder(INITIAL_TOKEN_LENGTH);
/** Token ready flag: indicates a valid token with content (ready for the parser). */
boolean isReady;
void reset() {
content.setLength(0);
type = INVALID;
isReady = false;
}
/**
* Eases IDE debugging.
*
* @return a string helpful for debugging.
*/
@Override
public String toString() {
return type.name() + " [" + content.toString() + "]";
}
}

View File

@ -1,82 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Apache Commons CSV Format Support.
*
* <p>CSV are widely used as interfaces to legacy systems or manual data-imports.
* CSV stands for "Comma Separated Values" (or sometimes "Character Separated
* Values"). The CSV data format is defined in
* <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a>
* but many dialects exist.</p>
*
* <p>Common to all file dialects is its basic structure: The CSV data-format
* is record oriented, whereas each record starts on a new textual line. A
* record is build of a list of values. Keep in mind that not all records
* must have an equal number of values:</p>
* <pre>
* csv := records*
* record := values*
* </pre>
*
* <p>The following list contains the CSV aspects the Commons CSV parser supports:</p>
* <dl>
* <dt>Separators (for lines)</dt>
* <dd>The record separators are hardcoded and cannot be changed. The must be '\r', '\n' or '\r\n'.</dd>
*
* <dt>Delimiter (for values)</dt>
* <dd>The delimiter for values is freely configurable (default ',').</dd>
*
* <dt>Comments</dt>
* <dd>Some CSV-dialects support a simple comment syntax. A comment is a record
* which must start with a designated character (the commentStarter). A record
* of this kind is treated as comment and gets removed from the input (default none)</dd>
*
* <dt>Encapsulator</dt>
* <dd>Two encapsulator characters (default '"') are used to enclose -&gt; complex values.</dd>
*
* <dt>Simple values</dt>
* <dd>A simple value consist of all characters (except the delimiter) until
* (but not including) the next delimiter or a record-terminator. Optionally
* all surrounding whitespaces of a simple value can be ignored (default: true).</dd>
*
* <dt>Complex values</dt>
* <dd>Complex values are encapsulated within a pair of the defined encapsulator characters.
* The encapsulator itself must be escaped or doubled when used inside complex values.
* Complex values preserve all kind of formatting (including newlines -&gt; multiline-values)</dd>
*
* <dt>Empty line skipping</dt>
* <dd>Optionally empty lines in CSV files can be skipped.
* Otherwise, empty lines will return a record with a single empty value.</dd>
* </dl>
*
* <p>In addition to individually defined dialects, two predefined dialects (strict-csv, and excel-csv)
* can be set directly.</p> <!-- TODO fix -->
*
* <p>Example usage:</p>
* <blockquote><pre>
* Reader in = new StringReader("a,b,c");
* for (CSVRecord record : CSVFormat.DEFAULT.parse(in)) {
* for (String field : record) {
* System.out.print("\"" + field + "\", ");
* }
* System.out.println();
* }
* </pre></blockquote>
*/
package org.apache.commons.csv;