Remove odd cruft.
This commit is contained in:
parent
29459edcdf
commit
91db4ff0b8
File diff suppressed because it is too large
Load Diff
|
@ -1,715 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv;
|
||||
|
||||
import static org.apache.commons.csv.Token.Type.TOKEN;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Objects;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Parses CSV files according to the specified format.
|
||||
*
|
||||
* Because CSV appears in many different dialects, the parser supports many formats by allowing the
|
||||
* specification of a {@link CSVFormat}.
|
||||
*
|
||||
* The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream.
|
||||
*
|
||||
* <h2>Creating instances</h2>
|
||||
* <p>
|
||||
* There are several static factory methods that can be used to create instances for various types of resources:
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li>
|
||||
* <li>{@link #parse(String, CSVFormat)}</li>
|
||||
* <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor.
|
||||
*
|
||||
* For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut:
|
||||
* </p>
|
||||
* <pre>
|
||||
* for(CSVRecord record : CSVFormat.EXCEL.parse(in)) {
|
||||
* ...
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* <h2>Parsing record wise</h2>
|
||||
* <p>
|
||||
* To parse a CSV input from a file, you write:
|
||||
* </p>
|
||||
*
|
||||
* <pre>
|
||||
* File csvData = new File("/path/to/csv");
|
||||
* CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180);
|
||||
* for (CSVRecord csvRecord : parser) {
|
||||
* ...
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* This will read the parse the contents of the file using the
|
||||
* <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* To parse CSV input in a format like Excel, you write:
|
||||
* </p>
|
||||
*
|
||||
* <pre>
|
||||
* CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL);
|
||||
* for (CSVRecord csvRecord : parser) {
|
||||
* ...
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* If the predefined formats don't match the format at hands, custom formats can be defined. More information about
|
||||
* customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Parsing into memory</h2>
|
||||
* <p>
|
||||
* If parsing record wise is not desired, the contents of the input can be read completely into memory.
|
||||
* </p>
|
||||
*
|
||||
* <pre>
|
||||
* Reader in = new StringReader("a;b\nc;d");
|
||||
* CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
|
||||
* List<CSVRecord> list = parser.getRecords();
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* There are two constraints that have to be kept in mind:
|
||||
* </p>
|
||||
*
|
||||
* <ol>
|
||||
* <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
|
||||
* the input, those records will not end up in the in memory representation of your CSV data.</li>
|
||||
* <li>Parsing into memory may consume a lot of system resources depending on the input. For example if you're
|
||||
* parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
|
||||
* </ol>
|
||||
*
|
||||
* <h2>Notes</h2>
|
||||
* <p>
|
||||
* Internal parser state is completely covered by the format and the reader-state.
|
||||
* </p>
|
||||
*
|
||||
* @see <a href="package-summary.html">package documentation for more details</a>
|
||||
*/
|
||||
public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
||||
|
||||
class CSVRecordIterator implements Iterator<CSVRecord> {
|
||||
private CSVRecord current;
|
||||
|
||||
private CSVRecord getNextRecord() {
|
||||
try {
|
||||
return CSVParser.this.nextRecord();
|
||||
} catch (final IOException e) {
|
||||
throw new IllegalStateException(
|
||||
e.getClass().getSimpleName() + " reading next record: " + e.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (CSVParser.this.isClosed()) {
|
||||
return false;
|
||||
}
|
||||
if (this.current == null) {
|
||||
this.current = this.getNextRecord();
|
||||
}
|
||||
|
||||
return this.current != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CSVRecord next() {
|
||||
if (CSVParser.this.isClosed()) {
|
||||
throw new NoSuchElementException("CSVParser has been closed");
|
||||
}
|
||||
CSVRecord next = this.current;
|
||||
this.current = null;
|
||||
|
||||
if (next == null) {
|
||||
// hasNext() wasn't called before
|
||||
next = this.getNextRecord();
|
||||
if (next == null) {
|
||||
throw new NoSuchElementException("No more CSV records available");
|
||||
}
|
||||
}
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Header information based on name and position.
|
||||
*/
|
||||
private static final class Headers {
|
||||
/**
|
||||
* Header column positions (0-based)
|
||||
*/
|
||||
final Map<String, Integer> headerMap;
|
||||
|
||||
/**
|
||||
* Header names in column order
|
||||
*/
|
||||
final List<String> headerNames;
|
||||
|
||||
Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
|
||||
this.headerMap = headerMap;
|
||||
this.headerNames = headerNames;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a parser for the given {@link File}.
|
||||
*
|
||||
* @param file
|
||||
* a CSV file. Must not be null.
|
||||
* @param charset
|
||||
* The Charset to decode the given file.
|
||||
* @param format
|
||||
* the CSVFormat used for CSV parsing. Must not be null.
|
||||
* @return a new parser
|
||||
* @throws IllegalArgumentException
|
||||
* If the parameters of the format are inconsistent or if either file or format are null.
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
@SuppressWarnings("resource")
|
||||
public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
|
||||
Objects.requireNonNull(file, "file");
|
||||
Objects.requireNonNull(format, "format");
|
||||
return new CSVParser(new InputStreamReader(new FileInputStream(file), charset), format);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a CSV parser using the given {@link CSVFormat}.
|
||||
*
|
||||
* <p>
|
||||
* If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
|
||||
* unless you close the {@code reader}.
|
||||
* </p>
|
||||
*
|
||||
* @param inputStream
|
||||
* an InputStream containing CSV-formatted input. Must not be null.
|
||||
* @param charset
|
||||
* The Charset to decode the given file.
|
||||
* @param format
|
||||
* the CSVFormat used for CSV parsing. Must not be null.
|
||||
* @return a new CSVParser configured with the given reader and format.
|
||||
* @throws IllegalArgumentException
|
||||
* If the parameters of the format are inconsistent or if either reader or format are null.
|
||||
* @throws IOException
|
||||
* If there is a problem reading the header or skipping the first record
|
||||
* @since 1.5
|
||||
*/
|
||||
@SuppressWarnings("resource")
|
||||
public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format)
|
||||
throws IOException {
|
||||
Objects.requireNonNull(inputStream, "inputStream");
|
||||
Objects.requireNonNull(format, "format");
|
||||
return parse(new InputStreamReader(inputStream, charset), format);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates and returns a parser for the given {@link Path}, which the caller MUST close.
|
||||
*
|
||||
* @param path
|
||||
* a CSV file. Must not be null.
|
||||
* @param charset
|
||||
* The Charset to decode the given file.
|
||||
* @param format
|
||||
* the CSVFormat used for CSV parsing. Must not be null.
|
||||
* @return a new parser
|
||||
* @throws IllegalArgumentException
|
||||
* If the parameters of the format are inconsistent or if either file or format are null.
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
* @since 1.5
|
||||
*/
|
||||
@SuppressWarnings("resource")
|
||||
public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException {
|
||||
Objects.requireNonNull(path, "path");
|
||||
Objects.requireNonNull(format, "format");
|
||||
return parse(Files.newInputStream(path), charset, format);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a CSV parser using the given {@link CSVFormat}
|
||||
*
|
||||
* <p>
|
||||
* If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
|
||||
* unless you close the {@code reader}.
|
||||
* </p>
|
||||
*
|
||||
* @param reader
|
||||
* a Reader containing CSV-formatted input. Must not be null.
|
||||
* @param format
|
||||
* the CSVFormat used for CSV parsing. Must not be null.
|
||||
* @return a new CSVParser configured with the given reader and format.
|
||||
* @throws IllegalArgumentException
|
||||
* If the parameters of the format are inconsistent or if either reader or format are null.
|
||||
* @throws IOException
|
||||
* If there is a problem reading the header or skipping the first record
|
||||
* @since 1.5
|
||||
*/
|
||||
public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
|
||||
return new CSVParser(reader, format);
|
||||
}
|
||||
|
||||
// the following objects are shared to reduce garbage
|
||||
|
||||
/**
|
||||
* Creates a parser for the given {@link String}.
|
||||
*
|
||||
* @param string
|
||||
* a CSV string. Must not be null.
|
||||
* @param format
|
||||
* the CSVFormat used for CSV parsing. Must not be null.
|
||||
* @return a new parser
|
||||
* @throws IllegalArgumentException
|
||||
* If the parameters of the format are inconsistent or if either string or format are null.
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
|
||||
Objects.requireNonNull(string, "string");
|
||||
Objects.requireNonNull(format, "format");
|
||||
|
||||
return new CSVParser(new StringReader(string), format);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates and returns a parser for the given URL, which the caller MUST close.
|
||||
*
|
||||
* <p>
|
||||
* If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless
|
||||
* you close the {@code url}.
|
||||
* </p>
|
||||
*
|
||||
* @param url
|
||||
* a URL. Must not be null.
|
||||
* @param charset
|
||||
* the charset for the resource. Must not be null.
|
||||
* @param format
|
||||
* the CSVFormat used for CSV parsing. Must not be null.
|
||||
* @return a new parser
|
||||
* @throws IllegalArgumentException
|
||||
* If the parameters of the format are inconsistent or if either url, charset or format are null.
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
@SuppressWarnings("resource")
|
||||
public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
|
||||
Objects.requireNonNull(url, "url");
|
||||
Objects.requireNonNull(charset, "charset");
|
||||
Objects.requireNonNull(format, "format");
|
||||
|
||||
return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
|
||||
}
|
||||
|
||||
private final CSVFormat format;
|
||||
|
||||
/** A mapping of column names to column indices */
|
||||
private final Map<String, Integer> headerMap;
|
||||
|
||||
/** The column order to avoid re-computing it. */
|
||||
private final List<String> headerNames;
|
||||
|
||||
private final Lexer lexer;
|
||||
|
||||
private final CSVRecordIterator csvRecordIterator;
|
||||
|
||||
/** A record buffer for getRecord(). Grows as necessary and is reused. */
|
||||
private final List<String> recordList = new ArrayList<>();
|
||||
|
||||
/**
|
||||
* The next record number to assign.
|
||||
*/
|
||||
private long recordNumber;
|
||||
|
||||
/**
|
||||
* Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
|
||||
* with {@link #recordNumber}.
|
||||
*/
|
||||
private final long characterOffset;
|
||||
|
||||
private final Token reusableToken = new Token();
|
||||
|
||||
/**
|
||||
* Customized CSV parser using the given {@link CSVFormat}
|
||||
*
|
||||
* <p>
|
||||
* If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
|
||||
* unless you close the {@code reader}.
|
||||
* </p>
|
||||
*
|
||||
* @param reader
|
||||
* a Reader containing CSV-formatted input. Must not be null.
|
||||
* @param format
|
||||
* the CSVFormat used for CSV parsing. Must not be null.
|
||||
* @throws IllegalArgumentException
|
||||
* If the parameters of the format are inconsistent or if either reader or format are null.
|
||||
* @throws IOException
|
||||
* If there is a problem reading the header or skipping the first record
|
||||
*/
|
||||
public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
|
||||
this(reader, format, 0, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Customized CSV parser using the given {@link CSVFormat}
|
||||
*
|
||||
* <p>
|
||||
* If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
|
||||
* unless you close the {@code reader}.
|
||||
* </p>
|
||||
*
|
||||
* @param reader
|
||||
* a Reader containing CSV-formatted input. Must not be null.
|
||||
* @param format
|
||||
* the CSVFormat used for CSV parsing. Must not be null.
|
||||
* @param characterOffset
|
||||
* Lexer offset when the parser does not start parsing at the beginning of the source.
|
||||
* @param recordNumber
|
||||
* The next record number to assign
|
||||
* @throws IllegalArgumentException
|
||||
* If the parameters of the format are inconsistent or if either reader or format are null.
|
||||
* @throws IOException
|
||||
* If there is a problem reading the header or skipping the first record
|
||||
* @since 1.1
|
||||
*/
|
||||
@SuppressWarnings("resource")
|
||||
public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
|
||||
throws IOException {
|
||||
Objects.requireNonNull(reader, "reader");
|
||||
Objects.requireNonNull(format, "format");
|
||||
|
||||
this.format = format;
|
||||
this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
|
||||
this.csvRecordIterator = new CSVRecordIterator();
|
||||
final Headers headers = createHeaders();
|
||||
this.headerMap = headers.headerMap;
|
||||
this.headerNames = headers.headerNames;
|
||||
this.characterOffset = characterOffset;
|
||||
this.recordNumber = recordNumber - 1;
|
||||
}
|
||||
|
||||
private void addRecordValue(final boolean lastRecord) {
|
||||
final String input = this.reusableToken.content.toString();
|
||||
final String inputClean = this.format.getTrim() ? input.trim() : input;
|
||||
if (lastRecord && inputClean.isEmpty() && this.format.getTrailingDelimiter()) {
|
||||
return;
|
||||
}
|
||||
final String nullString = this.format.getNullString();
|
||||
this.recordList.add(inputClean.equals(nullString) ? null : inputClean);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes resources.
|
||||
*
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (this.lexer != null) {
|
||||
this.lexer.close();
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, Integer> createEmptyHeaderMap() {
|
||||
return this.format.getIgnoreHeaderCase() ?
|
||||
new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
|
||||
new LinkedHashMap<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the name to index mapping if the format defines a header.
|
||||
*
|
||||
* @return null if the format has no header.
|
||||
* @throws IOException if there is a problem reading the header or skipping the first record
|
||||
*/
|
||||
private Headers createHeaders() throws IOException {
|
||||
Map<String, Integer> hdrMap = null;
|
||||
List<String> headerNames = null;
|
||||
final String[] formatHeader = this.format.getHeader();
|
||||
if (formatHeader != null) {
|
||||
hdrMap = createEmptyHeaderMap();
|
||||
String[] headerRecord = null;
|
||||
if (formatHeader.length == 0) {
|
||||
// read the header from the first line of the file
|
||||
final CSVRecord nextRecord = this.nextRecord();
|
||||
if (nextRecord != null) {
|
||||
headerRecord = nextRecord.values();
|
||||
}
|
||||
} else {
|
||||
if (this.format.getSkipHeaderRecord()) {
|
||||
this.nextRecord();
|
||||
}
|
||||
headerRecord = formatHeader;
|
||||
}
|
||||
|
||||
// build the name to index mappings
|
||||
if (headerRecord != null) {
|
||||
for (int i = 0; i < headerRecord.length; i++) {
|
||||
final String header = headerRecord[i];
|
||||
final boolean emptyHeader = header == null || header.trim().isEmpty();
|
||||
if (emptyHeader && !this.format.getAllowMissingColumnNames()) {
|
||||
throw new IllegalArgumentException(
|
||||
"A header name is missing in " + Arrays.toString(headerRecord));
|
||||
}
|
||||
// Note: This will always allow a duplicate header if the header is empty
|
||||
final boolean containsHeader = header != null && hdrMap.containsKey(header);
|
||||
if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) {
|
||||
throw new IllegalArgumentException(
|
||||
String.format(
|
||||
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
|
||||
header, Arrays.toString(headerRecord)));
|
||||
}
|
||||
if (header != null) {
|
||||
hdrMap.put(header, Integer.valueOf(i));
|
||||
if (headerNames == null) {
|
||||
headerNames = new ArrayList<>(headerRecord.length);
|
||||
}
|
||||
headerNames.add(header);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (headerNames == null) {
|
||||
headerNames = Collections.emptyList(); //immutable
|
||||
} else {
|
||||
headerNames = Collections.unmodifiableList(headerNames);
|
||||
}
|
||||
return new Headers(hdrMap, headerNames);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current line number in the input stream.
|
||||
*
|
||||
* <p>
|
||||
* <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
|
||||
* the record number.
|
||||
* </p>
|
||||
*
|
||||
* @return current line number
|
||||
*/
|
||||
public long getCurrentLineNumber() {
|
||||
return this.lexer.getCurrentLineNumber();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the first end-of-line string encountered.
|
||||
*
|
||||
* @return the first end-of-line string
|
||||
* @since 1.5
|
||||
*/
|
||||
public String getFirstEndOfLine() {
|
||||
return lexer.getFirstEol();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of the header map.
|
||||
* <p>
|
||||
* The map keys are column names. The map values are 0-based indices.
|
||||
* </p>
|
||||
* <p>
|
||||
* Note: The map can only provide a one-to-one mapping when the format did not
|
||||
* contain null or duplicate column names.
|
||||
* </p>
|
||||
*
|
||||
* @return a copy of the header map.
|
||||
*/
|
||||
public Map<String, Integer> getHeaderMap() {
|
||||
if (this.headerMap == null) {
|
||||
return null;
|
||||
}
|
||||
final Map<String, Integer> map = createEmptyHeaderMap();
|
||||
map.putAll(this.headerMap);
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the header map.
|
||||
*
|
||||
* @return the header map.
|
||||
*/
|
||||
Map<String, Integer> getHeaderMapRaw() {
|
||||
return this.headerMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a read-only list of header names that iterates in column order.
|
||||
* <p>
|
||||
* Note: The list provides strings that can be used as keys in the header map.
|
||||
* The list will not contain null column names if they were present in the input
|
||||
* format.
|
||||
* </p>
|
||||
*
|
||||
* @return read-only list of header names that iterates in column order.
|
||||
* @see #getHeaderMap()
|
||||
* @since 1.7
|
||||
*/
|
||||
public List<String> getHeaderNames() {
|
||||
return headerNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current record number in the input stream.
|
||||
*
|
||||
* <p>
|
||||
* <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
|
||||
* the line number.
|
||||
* </p>
|
||||
*
|
||||
* @return current record number
|
||||
*/
|
||||
public long getRecordNumber() {
|
||||
return this.recordNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the CSV input according to the given format and returns the content as a list of
|
||||
* {@link CSVRecord CSVRecords}.
|
||||
*
|
||||
* <p>
|
||||
* The returned content starts at the current parse-position in the stream.
|
||||
* </p>
|
||||
*
|
||||
* @return list of {@link CSVRecord CSVRecords}, may be empty
|
||||
* @throws IOException
|
||||
* on parse error or input read-failure
|
||||
*/
|
||||
public List<CSVRecord> getRecords() throws IOException {
|
||||
CSVRecord rec;
|
||||
final List<CSVRecord> records = new ArrayList<>();
|
||||
while ((rec = this.nextRecord()) != null) {
|
||||
records.add(rec);
|
||||
}
|
||||
return records;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets whether this parser is closed.
|
||||
*
|
||||
* @return whether this parser is closed.
|
||||
*/
|
||||
public boolean isClosed() {
|
||||
return this.lexer.isClosed();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an iterator on the records.
|
||||
*
|
||||
* <p>
|
||||
* An {@link IOException} caught during the iteration are re-thrown as an
|
||||
* {@link IllegalStateException}.
|
||||
* </p>
|
||||
* <p>
|
||||
* If the parser is closed a call to {@link Iterator#next()} will throw a
|
||||
* {@link NoSuchElementException}.
|
||||
* </p>
|
||||
*/
|
||||
@Override
|
||||
public Iterator<CSVRecord> iterator() {
|
||||
return csvRecordIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the next record from the current point in the stream.
|
||||
*
|
||||
* @return the record as an array of values, or {@code null} if the end of the stream has been reached
|
||||
* @throws IOException
|
||||
* on parse error or input read-failure
|
||||
*/
|
||||
CSVRecord nextRecord() throws IOException {
|
||||
CSVRecord result = null;
|
||||
this.recordList.clear();
|
||||
StringBuilder sb = null;
|
||||
final long startCharPosition = lexer.getCharacterPosition() + this.characterOffset;
|
||||
do {
|
||||
this.reusableToken.reset();
|
||||
this.lexer.nextToken(this.reusableToken);
|
||||
switch (this.reusableToken.type) {
|
||||
case TOKEN:
|
||||
this.addRecordValue(false);
|
||||
break;
|
||||
case EORECORD:
|
||||
this.addRecordValue(true);
|
||||
break;
|
||||
case EOF:
|
||||
if (this.reusableToken.isReady) {
|
||||
this.addRecordValue(true);
|
||||
}
|
||||
break;
|
||||
case INVALID:
|
||||
throw new IOException("(line " + this.getCurrentLineNumber() + ") invalid parse sequence");
|
||||
case COMMENT: // Ignored currently
|
||||
if (sb == null) { // first comment for this record
|
||||
sb = new StringBuilder();
|
||||
} else {
|
||||
sb.append(Constants.LF);
|
||||
}
|
||||
sb.append(this.reusableToken.content);
|
||||
this.reusableToken.type = TOKEN; // Read another token
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Unexpected Token type: " + this.reusableToken.type);
|
||||
}
|
||||
} while (this.reusableToken.type == TOKEN);
|
||||
|
||||
if (!this.recordList.isEmpty()) {
|
||||
this.recordNumber++;
|
||||
final String comment = sb == null ? null : sb.toString();
|
||||
result = new CSVRecord(this, this.recordList.toArray(new String[this.recordList.size()]),
|
||||
comment, this.recordNumber, startCharPosition);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,392 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv;
|
||||
|
||||
import static org.apache.commons.csv.Constants.CR;
|
||||
import static org.apache.commons.csv.Constants.LF;
|
||||
import static org.apache.commons.csv.Constants.SP;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.Flushable;
|
||||
import java.io.IOException;
|
||||
import java.sql.Clob;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Prints values in a {@link CSVFormat CSV format}.
|
||||
*
|
||||
* <p>Values can be appended to the output by calling the {@link #print(Object)} method.
|
||||
* Values are printed according to {@link String#valueOf(Object)}.
|
||||
* To complete a record the {@link #println()} method has to be called.
|
||||
* Comments can be appended by calling {@link #printComment(String)}.
|
||||
* However a comment will only be written to the output if the {@link CSVFormat} supports comments.
|
||||
* </p>
|
||||
*
|
||||
* <p>The printer also supports appending a complete record at once by calling {@link #printRecord(Object...)}
|
||||
* or {@link #printRecord(Iterable)}.
|
||||
* Furthermore {@link #printRecords(Object...)}, {@link #printRecords(Iterable)} and {@link #printRecords(ResultSet)}
|
||||
* methods can be used to print several records at once.
|
||||
* </p>
|
||||
*
|
||||
* <p>Example:</p>
|
||||
*
|
||||
* <pre>
|
||||
* try (CSVPrinter printer = new CSVPrinter(new FileWriter("csv.txt"), CSVFormat.EXCEL)) {
|
||||
* printer.printRecord("id", "userName", "firstName", "lastName", "birthday");
|
||||
* printer.printRecord(1, "john73", "John", "Doe", LocalDate.of(1973, 9, 15));
|
||||
* printer.println();
|
||||
* printer.printRecord(2, "mary", "Mary", "Meyer", LocalDate.of(1985, 3, 29));
|
||||
* } catch (IOException ex) {
|
||||
* ex.printStackTrace();
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* <p>This code will write the following to csv.txt:</p>
|
||||
* <pre>
|
||||
* id,userName,firstName,lastName,birthday
|
||||
* 1,john73,John,Doe,1973-09-15
|
||||
*
|
||||
* 2,mary,Mary,Meyer,1985-03-29
|
||||
* </pre>
|
||||
*/
|
||||
public final class CSVPrinter implements Flushable, Closeable {
|
||||
|
||||
/** The place that the values get written. */
|
||||
private final Appendable out;
|
||||
private final CSVFormat format;
|
||||
|
||||
/** True if we just began a new record. */
|
||||
private boolean newRecord = true;
|
||||
|
||||
/**
|
||||
* Creates a printer that will print values to the given stream following the CSVFormat.
|
||||
* <p>
|
||||
* Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation
|
||||
* and escaping with a different character) are not supported.
|
||||
* </p>
|
||||
*
|
||||
* @param out
|
||||
* stream to which to print. Must not be null.
|
||||
* @param format
|
||||
* the CSV format. Must not be null.
|
||||
* @throws IOException
|
||||
* thrown if the optional header cannot be printed.
|
||||
* @throws IllegalArgumentException
|
||||
* thrown if the parameters of the format are inconsistent or if either out or format are null.
|
||||
*/
|
||||
public CSVPrinter(final Appendable out, final CSVFormat format) throws IOException {
|
||||
Objects.requireNonNull(out, "out");
|
||||
Objects.requireNonNull(format, "format");
|
||||
|
||||
this.out = out;
|
||||
this.format = format;
|
||||
// TODO: Is it a good idea to do this here instead of on the first call to a print method?
|
||||
// It seems a pain to have to track whether the header has already been printed or not.
|
||||
if (format.getHeaderComments() != null) {
|
||||
for (final String line : format.getHeaderComments()) {
|
||||
if (line != null) {
|
||||
this.printComment(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (format.getHeader() != null && !format.getSkipHeaderRecord()) {
|
||||
this.printRecord((Object[]) format.getHeader());
|
||||
}
|
||||
}
|
||||
|
||||
// ======================================================
|
||||
// printing implementation
|
||||
// ======================================================
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
close(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the underlying stream with an optional flush first.
|
||||
* @param flush whether to flush before the actual close.
|
||||
*
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
* @since 1.6
|
||||
*/
|
||||
public void close(final boolean flush) throws IOException {
|
||||
if (flush || format.getAutoFlush()) {
|
||||
flush();
|
||||
}
|
||||
if (out instanceof Closeable) {
|
||||
((Closeable) out).close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Flushes the underlying stream.
|
||||
*
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
if (out instanceof Flushable) {
|
||||
((Flushable) out).flush();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the target Appendable.
|
||||
*
|
||||
* @return the target Appendable.
|
||||
*/
|
||||
public Appendable getOut() {
|
||||
return this.out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints the string as the next value on the line. The value will be escaped or encapsulated as needed.
|
||||
*
|
||||
* @param value
|
||||
* value to be output.
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
public void print(final Object value) throws IOException {
|
||||
format.print(value, out, newRecord);
|
||||
newRecord = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints a comment on a new line among the delimiter separated values.
|
||||
*
|
||||
* <p>
|
||||
* Comments will always begin on a new line and occupy at least one full line. The character specified to start
|
||||
* comments and a space will be inserted at the beginning of each new line in the comment.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* If comments are disabled in the current CSV format this method does nothing.
|
||||
* </p>
|
||||
*
|
||||
* <p>This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()}
|
||||
* to start a new line of the comment. Note that this might produce unexpected results for formats that do not use
|
||||
* line breaks as record separator.</p>
|
||||
*
|
||||
* @param comment
|
||||
* the comment to output
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
public void printComment(final String comment) throws IOException {
|
||||
if (!format.isCommentMarkerSet()) {
|
||||
return;
|
||||
}
|
||||
if (!newRecord) {
|
||||
println();
|
||||
}
|
||||
out.append(format.getCommentMarker().charValue());
|
||||
out.append(SP);
|
||||
final int commentLength = comment.length();
|
||||
for (int i = 0; i < commentLength; i++) {
|
||||
final char c = comment.charAt(i);
|
||||
switch (c) {
|
||||
case CR:
|
||||
if (i + 1 < commentLength && comment.charAt(i + 1) == LF) {
|
||||
i++;
|
||||
}
|
||||
//$FALL-THROUGH$ break intentionally excluded.
|
||||
case LF:
|
||||
println();
|
||||
out.append(format.getCommentMarker().charValue());
|
||||
out.append(SP);
|
||||
break;
|
||||
default:
|
||||
out.append(c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
println();
|
||||
}
|
||||
|
||||
/**
|
||||
* Outputs the record separator.
|
||||
*
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
public void println() throws IOException {
|
||||
format.println(out);
|
||||
newRecord = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints the given values a single record of delimiter separated values followed by the record separator.
|
||||
*
|
||||
* <p>
|
||||
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
|
||||
* separator to the output after printing the record, so there is no need to call {@link #println()}.
|
||||
* </p>
|
||||
*
|
||||
* @param values
|
||||
* values to output.
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
public void printRecord(final Iterable<?> values) throws IOException {
|
||||
for (final Object value : values) {
|
||||
print(value);
|
||||
}
|
||||
println();
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints the given values a single record of delimiter separated values followed by the record separator.
|
||||
*
|
||||
* <p>
|
||||
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
|
||||
* separator to the output after printing the record, so there is no need to call {@link #println()}.
|
||||
* </p>
|
||||
*
|
||||
* @param values
|
||||
* values to output.
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
public void printRecord(final Object... values) throws IOException {
|
||||
format.printRecord(out, values);
|
||||
newRecord = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints all the objects in the given collection handling nested collections/arrays as records.
|
||||
*
|
||||
* <p>
|
||||
* If the given collection only contains simple objects, this method will print a single record like
|
||||
* {@link #printRecord(Iterable)}. If the given collections contains nested collections/arrays those nested elements
|
||||
* will each be printed as records using {@link #printRecord(Object...)}.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Given the following data structure:
|
||||
* </p>
|
||||
*
|
||||
* <pre>
|
||||
* <code>
|
||||
* List<String[]> data = ...
|
||||
* data.add(new String[]{ "A", "B", "C" });
|
||||
* data.add(new String[]{ "1", "2", "3" });
|
||||
* data.add(new String[]{ "A1", "B2", "C3" });
|
||||
* </code>
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* Calling this method will print:
|
||||
* </p>
|
||||
*
|
||||
* <pre>
|
||||
* <code>
|
||||
* A, B, C
|
||||
* 1, 2, 3
|
||||
* A1, B2, C3
|
||||
* </code>
|
||||
* </pre>
|
||||
*
|
||||
* @param values
|
||||
* the values to print.
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
public void printRecords(final Iterable<?> values) throws IOException {
|
||||
for (final Object value : values) {
|
||||
if (value instanceof Object[]) {
|
||||
this.printRecord((Object[]) value);
|
||||
} else if (value instanceof Iterable) {
|
||||
this.printRecord((Iterable<?>) value);
|
||||
} else {
|
||||
this.printRecord(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints all the objects in the given array handling nested collections/arrays as records.
|
||||
*
|
||||
* <p>
|
||||
* If the given array only contains simple objects, this method will print a single record like
|
||||
* {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested
|
||||
* elements will each be printed as records using {@link #printRecord(Object...)}.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Given the following data structure:
|
||||
* </p>
|
||||
*
|
||||
* <pre>
|
||||
* <code>
|
||||
* String[][] data = new String[3][]
|
||||
* data[0] = String[]{ "A", "B", "C" };
|
||||
* data[1] = new String[]{ "1", "2", "3" };
|
||||
* data[2] = new String[]{ "A1", "B2", "C3" };
|
||||
* </code>
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* Calling this method will print:
|
||||
* </p>
|
||||
*
|
||||
* <pre>
|
||||
* <code>
|
||||
* A, B, C
|
||||
* 1, 2, 3
|
||||
* A1, B2, C3
|
||||
* </code>
|
||||
* </pre>
|
||||
*
|
||||
* @param values
|
||||
* the values to print.
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
public void printRecords(final Object... values) throws IOException {
|
||||
printRecords(Arrays.asList(values));
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints all the objects in the given JDBC result set.
|
||||
*
|
||||
* @param resultSet
|
||||
* result set the values to print.
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
* @throws SQLException
|
||||
* if a database access error occurs
|
||||
*/
|
||||
public void printRecords(final ResultSet resultSet) throws SQLException, IOException {
|
||||
final int columnCount = resultSet.getMetaData().getColumnCount();
|
||||
while (resultSet.next()) {
|
||||
for (int i = 1; i <= columnCount; i++) {
|
||||
final Object object = resultSet.getObject(i);
|
||||
// TODO Who manages the Clob? The JDBC driver or must we close it? Is it driver-dependent?
|
||||
print(object instanceof Clob ? ((Clob) object).getCharacterStream() : object);
|
||||
}
|
||||
println();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,329 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* A CSV record parsed from a CSV file.
|
||||
*
|
||||
* <p>
|
||||
* Note: Support for {@link Serializable} is scheduled to be removed in version 2.0.
|
||||
* In version 1.8 the mapping between the column header and the column index was
|
||||
* removed from the serialised state. The class maintains serialization compatibility
|
||||
* with versions pre-1.8 for the record values; these must be accessed by index
|
||||
* following deserialization. There will be loss of any functionally linked to the header
|
||||
* mapping when transferring serialised forms pre-1.8 to 1.8 and vice versa.
|
||||
* </p>
|
||||
*/
|
||||
public final class CSVRecord implements Serializable, Iterable<String> {
|
||||
|
||||
private static final String[] EMPTY_STRING_ARRAY = new String[0];
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private final long characterPosition;
|
||||
|
||||
/** The accumulated comments (if any) */
|
||||
private final String comment;
|
||||
|
||||
/** The record number. */
|
||||
private final long recordNumber;
|
||||
|
||||
/** The values of the record */
|
||||
private final String[] values;
|
||||
|
||||
/** The parser that originates this record. This is not serialized. */
|
||||
private final transient CSVParser parser;
|
||||
|
||||
CSVRecord(final CSVParser parser, final String[] values, final String comment, final long recordNumber,
|
||||
final long characterPosition) {
|
||||
this.recordNumber = recordNumber;
|
||||
this.values = values != null ? values : EMPTY_STRING_ARRAY;
|
||||
this.parser = parser;
|
||||
this.comment = comment;
|
||||
this.characterPosition = characterPosition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a value by {@link Enum}.
|
||||
*
|
||||
* @param e
|
||||
* an enum
|
||||
* @return the String at the given enum String
|
||||
*/
|
||||
public String get(final Enum<?> e) {
|
||||
return get(Objects.toString(e, null));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a value by index.
|
||||
*
|
||||
* @param i
|
||||
* a column index (0-based)
|
||||
* @return the String at the given index
|
||||
*/
|
||||
public String get(final int i) {
|
||||
return values[i];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a value by name.
|
||||
*
|
||||
* <p>
|
||||
* Note: This requires a field mapping obtained from the original parser.
|
||||
* A check using {@link #isMapped(String)} should be used to determine if a
|
||||
* mapping exists from the provided {@code name} to a field index. In this case an
|
||||
* exception will only be thrown if the record does not contain a field corresponding
|
||||
* to the mapping, that is the record length is not consistent with the mapping size.
|
||||
* </p>
|
||||
*
|
||||
* @param name
|
||||
* the name of the column to be retrieved.
|
||||
* @return the column value, maybe null depending on {@link CSVFormat#getNullString()}.
|
||||
* @throws IllegalStateException
|
||||
* if no header mapping was provided
|
||||
* @throws IllegalArgumentException
|
||||
* if {@code name} is not mapped or if the record is inconsistent
|
||||
* @see #isMapped(String)
|
||||
* @see #isConsistent()
|
||||
* @see #getParser()
|
||||
* @see CSVFormat#withNullString(String)
|
||||
*/
|
||||
public String get(final String name) {
|
||||
final Map<String, Integer> headerMap = getHeaderMapRaw();
|
||||
if (headerMap == null) {
|
||||
throw new IllegalStateException(
|
||||
"No header mapping was specified, the record values can't be accessed by name");
|
||||
}
|
||||
final Integer index = headerMap.get(name);
|
||||
if (index == null) {
|
||||
throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name,
|
||||
headerMap.keySet()));
|
||||
}
|
||||
try {
|
||||
return values[index.intValue()];
|
||||
} catch (final ArrayIndexOutOfBoundsException e) {
|
||||
throw new IllegalArgumentException(String.format(
|
||||
"Index for header '%s' is %d but CSVRecord only has %d values!", name, index,
|
||||
Integer.valueOf(values.length)));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the start position of this record as a character position in the source stream. This may or may not
|
||||
* correspond to the byte position depending on the character set.
|
||||
*
|
||||
* @return the position of this record in the source stream.
|
||||
*/
|
||||
public long getCharacterPosition() {
|
||||
return characterPosition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the comment for this record, if any.
|
||||
* Note that comments are attached to the following record.
|
||||
* If there is no following record (i.e. the comment is at EOF)
|
||||
* the comment will be ignored.
|
||||
*
|
||||
* @return the comment for this record, or null if no comment for this record is available.
|
||||
*/
|
||||
public String getComment() {
|
||||
return comment;
|
||||
}
|
||||
|
||||
private Map<String, Integer> getHeaderMapRaw() {
|
||||
return parser == null ? null : parser.getHeaderMapRaw();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the parser.
|
||||
*
|
||||
* <p>
|
||||
* Note: The parser is not part of the serialized state of the record. A null check
|
||||
* should be used when the record may have originated from a serialized form.
|
||||
* </p>
|
||||
*
|
||||
* @return the parser.
|
||||
* @since 1.7
|
||||
*/
|
||||
public CSVParser getParser() {
|
||||
return parser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of this record in the parsed CSV file.
|
||||
*
|
||||
* <p>
|
||||
* <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
|
||||
* the current line number of the parser that created this record.
|
||||
* </p>
|
||||
*
|
||||
* @return the number of this record.
|
||||
* @see CSVParser#getCurrentLineNumber()
|
||||
*/
|
||||
public long getRecordNumber() {
|
||||
return recordNumber;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether this record has a comment, false otherwise.
|
||||
* Note that comments are attached to the following record.
|
||||
* If there is no following record (i.e. the comment is at EOF)
|
||||
* the comment will be ignored.
|
||||
*
|
||||
* @return true if this record has a comment, false otherwise
|
||||
* @since 1.3
|
||||
*/
|
||||
public boolean hasComment() {
|
||||
return comment != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether the record size matches the header size.
|
||||
*
|
||||
* <p>
|
||||
* Returns true if the sizes for this record match and false if not. Some programs can export files that fail this
|
||||
* test but still produce parsable files.
|
||||
* </p>
|
||||
*
|
||||
* @return true of this record is valid, false if not
|
||||
*/
|
||||
public boolean isConsistent() {
|
||||
final Map<String, Integer> headerMap = getHeaderMapRaw();
|
||||
return headerMap == null || headerMap.size() == values.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether a given column is mapped, i.e. its name has been defined to the parser.
|
||||
*
|
||||
* @param name
|
||||
* the name of the column to be retrieved.
|
||||
* @return whether a given column is mapped.
|
||||
*/
|
||||
public boolean isMapped(final String name) {
|
||||
final Map<String, Integer> headerMap = getHeaderMapRaw();
|
||||
return headerMap != null && headerMap.containsKey(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether a column with given index has a value.
|
||||
*
|
||||
* @param index
|
||||
* a column index (0-based)
|
||||
* @return whether a column with given index has a value
|
||||
*/
|
||||
public boolean isSet(final int index) {
|
||||
return 0 <= index && index < values.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether a given columns is mapped and has a value.
|
||||
*
|
||||
* @param name
|
||||
* the name of the column to be retrieved.
|
||||
* @return whether a given columns is mapped and has a value
|
||||
*/
|
||||
public boolean isSet(final String name) {
|
||||
return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an iterator over the values of this record.
|
||||
*
|
||||
* @return an iterator over the values of this record.
|
||||
*/
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return toList().iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Puts all values of this record into the given Map.
|
||||
*
|
||||
* @param map
|
||||
* The Map to populate.
|
||||
* @return the given map.
|
||||
* @since 1.9.0
|
||||
*/
|
||||
public <M extends Map<String, String>> M putIn(final M map) {
|
||||
if (getHeaderMapRaw() == null) {
|
||||
return map;
|
||||
}
|
||||
for (final Entry<String, Integer> entry : getHeaderMapRaw().entrySet()) {
|
||||
final int col = entry.getValue().intValue();
|
||||
if (col < values.length) {
|
||||
map.put(entry.getKey(), values[col]);
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of values in this record.
|
||||
*
|
||||
* @return the number of values.
|
||||
*/
|
||||
public int size() {
|
||||
return values.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the values to a List.
|
||||
*
|
||||
* TODO: Maybe make this public?
|
||||
*
|
||||
* @return a new List
|
||||
*/
|
||||
private List<String> toList() {
|
||||
return Arrays.asList(values);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies this record into a new Map of header name to record value.
|
||||
*
|
||||
* @return A new Map. The map is empty if the record has no headers.
|
||||
*/
|
||||
public Map<String, String> toMap() {
|
||||
return putIn(new LinkedHashMap<String, String>(values.length));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string representation of the contents of this record. The result is constructed by comment, mapping,
|
||||
* recordNumber and by passing the internal values array to {@link Arrays#toString(Object[])}.
|
||||
*
|
||||
* @return a String representation of this record.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" +
|
||||
Arrays.toString(values) + "]";
|
||||
}
|
||||
|
||||
String[] values() {
|
||||
return values;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv;
|
||||
|
||||
/**
|
||||
* Constants for this package.
|
||||
*/
|
||||
final class Constants {
|
||||
|
||||
static final char BACKSLASH = '\\';
|
||||
|
||||
static final char BACKSPACE = '\b';
|
||||
|
||||
static final char COMMA = ',';
|
||||
|
||||
/**
|
||||
* Starts a comment, the remainder of the line is the comment.
|
||||
*/
|
||||
static final char COMMENT = '#';
|
||||
|
||||
static final char CR = '\r';
|
||||
|
||||
/** RFC 4180 defines line breaks as CRLF */
|
||||
static final String CRLF = "\r\n";
|
||||
|
||||
static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"');
|
||||
|
||||
static final String EMPTY = "";
|
||||
|
||||
/** The end of stream symbol */
|
||||
static final int END_OF_STREAM = -1;
|
||||
|
||||
static final char FF = '\f';
|
||||
|
||||
static final char LF = '\n';
|
||||
|
||||
/**
|
||||
* Unicode line separator.
|
||||
*/
|
||||
static final String LINE_SEPARATOR = "\u2028";
|
||||
|
||||
/**
|
||||
* Unicode next line.
|
||||
*/
|
||||
static final String NEXT_LINE = "\u0085";
|
||||
|
||||
/**
|
||||
* Unicode paragraph separator.
|
||||
*/
|
||||
static final String PARAGRAPH_SEPARATOR = "\u2029";
|
||||
|
||||
static final char PIPE = '|';
|
||||
|
||||
/** ASCII record separator */
|
||||
static final char RS = 30;
|
||||
|
||||
static final char SP = ' ';
|
||||
|
||||
static final char TAB = '\t';
|
||||
|
||||
/** Undefined state for the lookahead char */
|
||||
static final int UNDEFINED = -2;
|
||||
|
||||
/** ASCII unit separator */
|
||||
static final char US = 31;
|
||||
|
||||
}
|
|
@ -1,191 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv;
|
||||
|
||||
import static org.apache.commons.csv.Constants.CR;
|
||||
import static org.apache.commons.csv.Constants.END_OF_STREAM;
|
||||
import static org.apache.commons.csv.Constants.LF;
|
||||
import static org.apache.commons.csv.Constants.UNDEFINED;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* A special buffered reader which supports sophisticated read access.
|
||||
* <p>
|
||||
* In particular the reader supports a look-ahead option, which allows you to see the next char returned by
|
||||
* {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}.
|
||||
* </p>
|
||||
*/
|
||||
final class ExtendedBufferedReader extends BufferedReader {
|
||||
|
||||
/** The last char returned */
|
||||
private int lastChar = UNDEFINED;
|
||||
|
||||
/** The count of EOLs (CR/LF/CRLF) seen so far */
|
||||
private long eolCounter;
|
||||
|
||||
/** The position, which is number of characters read so far */
|
||||
private long position;
|
||||
|
||||
private boolean closed;
|
||||
|
||||
/**
|
||||
* Created extended buffered reader using default buffer-size
|
||||
*/
|
||||
ExtendedBufferedReader(final Reader reader) {
|
||||
super(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the stream.
|
||||
*
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
// Set ivars before calling super close() in case close() throws an IOException.
|
||||
closed = true;
|
||||
lastChar = END_OF_STREAM;
|
||||
super.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current line number
|
||||
*
|
||||
* @return the current line number
|
||||
*/
|
||||
long getCurrentLineNumber() {
|
||||
// Check if we are at EOL or EOF or just starting
|
||||
if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
|
||||
return eolCounter; // counter is accurate
|
||||
}
|
||||
return eolCounter + 1; // Allow for counter being incremented only at EOL
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by
|
||||
* any of the read methods. This will not include a character read using the {@link #lookAhead()} method. If no
|
||||
* character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached
|
||||
* on the last read then this will return {@link Constants#END_OF_STREAM}.
|
||||
*
|
||||
* @return the last character that was read
|
||||
*/
|
||||
int getLastChar() {
|
||||
return lastChar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the character position in the reader.
|
||||
*
|
||||
* @return the current position in the reader (counting characters, not bytes since this is a Reader)
|
||||
*/
|
||||
long getPosition() {
|
||||
return this.position;
|
||||
}
|
||||
|
||||
public boolean isClosed() {
|
||||
return closed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
|
||||
* still return this value. Does not affect line number or last character.
|
||||
*
|
||||
* @return the next character
|
||||
*
|
||||
* @throws IOException
|
||||
* if there is an error in reading
|
||||
*/
|
||||
int lookAhead() throws IOException {
|
||||
super.mark(1);
|
||||
final int c = super.read();
|
||||
super.reset();
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
final int current = super.read();
|
||||
if (current == CR || current == LF && lastChar != CR) {
|
||||
eolCounter++;
|
||||
}
|
||||
lastChar = current;
|
||||
this.position++;
|
||||
return lastChar;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(final char[] buf, final int offset, final int length) throws IOException {
|
||||
if (length == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
final int len = super.read(buf, offset, length);
|
||||
|
||||
if (len > 0) {
|
||||
|
||||
for (int i = offset; i < offset + len; i++) {
|
||||
final char ch = buf[i];
|
||||
if (ch == LF) {
|
||||
if (CR != (i > 0 ? buf[i - 1] : lastChar)) {
|
||||
eolCounter++;
|
||||
}
|
||||
} else if (ch == CR) {
|
||||
eolCounter++;
|
||||
}
|
||||
}
|
||||
|
||||
lastChar = buf[offset + len - 1];
|
||||
|
||||
} else if (len == -1) {
|
||||
lastChar = END_OF_STREAM;
|
||||
}
|
||||
|
||||
position += len;
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
|
||||
* when processing a comment, otherwise information can be lost.
|
||||
* <p>
|
||||
* Increments {@link #eolCounter}
|
||||
* <p>
|
||||
* Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise to LF
|
||||
*
|
||||
* @return the line that was read, or null if reached EOF.
|
||||
*/
|
||||
@Override
|
||||
public String readLine() throws IOException {
|
||||
final String line = super.readLine();
|
||||
|
||||
if (line != null) {
|
||||
lastChar = LF; // needed for detecting start of line
|
||||
eolCounter++;
|
||||
} else {
|
||||
lastChar = END_OF_STREAM;
|
||||
}
|
||||
|
||||
return line;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,139 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.csv;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.Writer;
|
||||
import java.nio.CharBuffer;
|
||||
|
||||
/** Copied from Apache Commons IO. */
|
||||
class IOUtils {
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Copied from Apache Commons IO.
|
||||
* </p>
|
||||
* The default buffer size ({@value}).
|
||||
*/
|
||||
static final int DEFAULT_BUFFER_SIZE = 1024 * 4;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Copied from Apache Commons IO.
|
||||
* </p>
|
||||
* Represents the end-of-file (or stream).
|
||||
* @since 2.5 (made public)
|
||||
*/
|
||||
private static final int EOF = -1;
|
||||
|
||||
/**
|
||||
* Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}.
|
||||
* <p>
|
||||
* This method buffers the input internally, so there is no need to use a
|
||||
* {@code BufferedReader}.
|
||||
* </p>
|
||||
* The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}.
|
||||
*
|
||||
* @param input the {@code Reader} to read from
|
||||
* @param output the {@code Appendable} to append to
|
||||
* @return the number of characters copied
|
||||
* @throws NullPointerException if the input or output is null
|
||||
* @throws IOException if an I/O error occurs
|
||||
* @since 2.7
|
||||
*/
|
||||
static long copy(final Reader input, final Appendable output) throws IOException {
|
||||
return copy(input, output, CharBuffer.allocate(DEFAULT_BUFFER_SIZE));
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}.
|
||||
* <p>
|
||||
* This method uses the provided buffer, so there is no need to use a
|
||||
* {@code BufferedReader}.
|
||||
* </p>
|
||||
*
|
||||
* @param input the {@code Reader} to read from
|
||||
* @param output the {@code Appendable} to write to
|
||||
* @param buffer the buffer to be used for the copy
|
||||
* @return the number of characters copied
|
||||
* @throws NullPointerException if the input or output is null
|
||||
* @throws IOException if an I/O error occurs
|
||||
* @since 2.7
|
||||
*/
|
||||
static long copy(final Reader input, final Appendable output, final CharBuffer buffer) throws IOException {
|
||||
long count = 0;
|
||||
int n;
|
||||
while (EOF != (n = input.read(buffer))) {
|
||||
buffer.flip();
|
||||
output.append(buffer, 0, n);
|
||||
count += n;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Copied from Apache Commons IO.
|
||||
* </p>
|
||||
* Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}.
|
||||
* <p>
|
||||
* This method buffers the input internally, so there is no need to use a
|
||||
* {@code BufferedReader}.
|
||||
* <p>
|
||||
* The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}.
|
||||
*
|
||||
* @param input the {@code Reader} to read from
|
||||
* @param output the {@code Writer} to write to
|
||||
* @return the number of characters copied
|
||||
* @throws NullPointerException if the input or output is null
|
||||
* @throws IOException if an I/O error occurs
|
||||
* @since 1.3
|
||||
*/
|
||||
static long copyLarge(final Reader input, final Writer output) throws IOException {
|
||||
return copyLarge(input, output, new char[DEFAULT_BUFFER_SIZE]);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Copied from Apache Commons IO.
|
||||
* </p>
|
||||
* Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}.
|
||||
* <p>
|
||||
* This method uses the provided buffer, so there is no need to use a
|
||||
* {@code BufferedReader}.
|
||||
* <p>
|
||||
*
|
||||
* @param input the {@code Reader} to read from
|
||||
* @param output the {@code Writer} to write to
|
||||
* @param buffer the buffer to be used for the copy
|
||||
* @return the number of characters copied
|
||||
* @throws NullPointerException if the input or output is null
|
||||
* @throws IOException if an I/O error occurs
|
||||
* @since 2.2
|
||||
*/
|
||||
static long copyLarge(final Reader input, final Writer output, final char[] buffer) throws IOException {
|
||||
long count = 0;
|
||||
int n;
|
||||
while (EOF != (n = input.read(buffer))) {
|
||||
output.write(buffer, 0, n);
|
||||
count += n;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,461 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv;
|
||||
|
||||
import static org.apache.commons.csv.Constants.BACKSPACE;
|
||||
import static org.apache.commons.csv.Constants.CR;
|
||||
import static org.apache.commons.csv.Constants.END_OF_STREAM;
|
||||
import static org.apache.commons.csv.Constants.FF;
|
||||
import static org.apache.commons.csv.Constants.LF;
|
||||
import static org.apache.commons.csv.Constants.TAB;
|
||||
import static org.apache.commons.csv.Constants.UNDEFINED;
|
||||
import static org.apache.commons.csv.Token.Type.COMMENT;
|
||||
import static org.apache.commons.csv.Token.Type.EOF;
|
||||
import static org.apache.commons.csv.Token.Type.EORECORD;
|
||||
import static org.apache.commons.csv.Token.Type.INVALID;
|
||||
import static org.apache.commons.csv.Token.Type.TOKEN;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Lexical analyzer.
|
||||
*/
|
||||
final class Lexer implements Closeable {
|
||||
|
||||
private static final String CR_STRING = Character.toString(CR);
|
||||
private static final String LF_STRING = Character.toString(LF);
|
||||
|
||||
/**
|
||||
* Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it
|
||||
* won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two
|
||||
* chars (using surrogates) and thus there should never be a collision with a real text char.
|
||||
*/
|
||||
private static final char DISABLED = '\ufffe';
|
||||
|
||||
private final char delimiter;
|
||||
private final char escape;
|
||||
private final char quoteChar;
|
||||
private final char commentStart;
|
||||
|
||||
private final boolean ignoreSurroundingSpaces;
|
||||
private final boolean ignoreEmptyLines;
|
||||
|
||||
/** The input stream */
|
||||
private final ExtendedBufferedReader reader;
|
||||
private String firstEol;
|
||||
|
||||
Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
|
||||
this.reader = reader;
|
||||
this.delimiter = format.getDelimiter();
|
||||
this.escape = mapNullToDisabled(format.getEscapeCharacter());
|
||||
this.quoteChar = mapNullToDisabled(format.getQuoteCharacter());
|
||||
this.commentStart = mapNullToDisabled(format.getCommentMarker());
|
||||
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
|
||||
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes resources.
|
||||
*
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
reader.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current character position
|
||||
*
|
||||
* @return the current character position
|
||||
*/
|
||||
long getCharacterPosition() {
|
||||
return reader.getPosition();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current line number
|
||||
*
|
||||
* @return the current line number
|
||||
*/
|
||||
long getCurrentLineNumber() {
|
||||
return reader.getCurrentLineNumber();
|
||||
}
|
||||
|
||||
String getFirstEol(){
|
||||
return firstEol;
|
||||
}
|
||||
|
||||
boolean isClosed() {
|
||||
return reader.isClosed();
|
||||
}
|
||||
|
||||
boolean isCommentStart(final int ch) {
|
||||
return ch == commentStart;
|
||||
}
|
||||
|
||||
boolean isDelimiter(final int ch) {
|
||||
return ch == delimiter;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given character indicates end of file
|
||||
*/
|
||||
boolean isEndOfFile(final int ch) {
|
||||
return ch == END_OF_STREAM;
|
||||
}
|
||||
|
||||
boolean isEscape(final int ch) {
|
||||
return ch == escape;
|
||||
}
|
||||
|
||||
private boolean isMetaChar(final int ch) {
|
||||
return ch == delimiter ||
|
||||
ch == escape ||
|
||||
ch == quoteChar ||
|
||||
ch == commentStart;
|
||||
}
|
||||
|
||||
boolean isQuoteChar(final int ch) {
|
||||
return ch == quoteChar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the current character represents the start of a line: a CR, LF or is at the start of the file.
|
||||
*
|
||||
* @param ch the character to check
|
||||
* @return true if the character is at the start of a line.
|
||||
*/
|
||||
boolean isStartOfLine(final int ch) {
|
||||
return ch == LF || ch == CR || ch == UNDEFINED;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given char is a whitespace character
|
||||
*/
|
||||
boolean isWhitespace(final int ch) {
|
||||
return !isDelimiter(ch) && Character.isWhitespace((char) ch);
|
||||
}
|
||||
|
||||
private char mapNullToDisabled(final Character c) {
|
||||
return c == null ? DISABLED : c.charValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next token.
|
||||
* <p>
|
||||
* A token corresponds to a term, a record change or an end-of-file indicator.
|
||||
* </p>
|
||||
*
|
||||
* @param token
|
||||
* an existing Token object to reuse. The caller is responsible to initialize the Token.
|
||||
* @return the next token found
|
||||
* @throws java.io.IOException
|
||||
* on stream access error
|
||||
*/
|
||||
Token nextToken(final Token token) throws IOException {
|
||||
|
||||
// get the last read char (required for empty line detection)
|
||||
int lastChar = reader.getLastChar();
|
||||
|
||||
// read the next char and set eol
|
||||
int c = reader.read();
|
||||
/*
|
||||
* Note: The following call will swallow LF if c == CR. But we don't need to know if the last char was CR or LF
|
||||
* - they are equivalent here.
|
||||
*/
|
||||
boolean eol = readEndOfLine(c);
|
||||
|
||||
// empty line detection: eol AND (last char was EOL or beginning)
|
||||
if (ignoreEmptyLines) {
|
||||
while (eol && isStartOfLine(lastChar)) {
|
||||
// go on char ahead ...
|
||||
lastChar = c;
|
||||
c = reader.read();
|
||||
eol = readEndOfLine(c);
|
||||
// reached end of file without any content (empty line at the end)
|
||||
if (isEndOfFile(c)) {
|
||||
token.type = EOF;
|
||||
// don't set token.isReady here because no content
|
||||
return token;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// did we reach eof during the last iteration already ? EOF
|
||||
if (isEndOfFile(lastChar) || !isDelimiter(lastChar) && isEndOfFile(c)) {
|
||||
token.type = EOF;
|
||||
// don't set token.isReady here because no content
|
||||
return token;
|
||||
}
|
||||
|
||||
if (isStartOfLine(lastChar) && isCommentStart(c)) {
|
||||
final String line = reader.readLine();
|
||||
if (line == null) {
|
||||
token.type = EOF;
|
||||
// don't set token.isReady here because no content
|
||||
return token;
|
||||
}
|
||||
final String comment = line.trim();
|
||||
token.content.append(comment);
|
||||
token.type = COMMENT;
|
||||
return token;
|
||||
}
|
||||
|
||||
// important: make sure a new char gets consumed in each iteration
|
||||
while (token.type == INVALID) {
|
||||
// ignore whitespaces at beginning of a token
|
||||
if (ignoreSurroundingSpaces) {
|
||||
while (isWhitespace(c) && !eol) {
|
||||
c = reader.read();
|
||||
eol = readEndOfLine(c);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, start of token reached: encapsulated, or token
|
||||
if (isDelimiter(c)) {
|
||||
// empty token return TOKEN("")
|
||||
token.type = TOKEN;
|
||||
} else if (eol) {
|
||||
// empty token return EORECORD("")
|
||||
// noop: token.content.append("");
|
||||
token.type = EORECORD;
|
||||
} else if (isQuoteChar(c)) {
|
||||
// consume encapsulated token
|
||||
parseEncapsulatedToken(token);
|
||||
} else if (isEndOfFile(c)) {
|
||||
// end of file return EOF()
|
||||
// noop: token.content.append("");
|
||||
token.type = EOF;
|
||||
token.isReady = true; // there is data at EOF
|
||||
} else {
|
||||
// next token must be a simple token
|
||||
// add removed blanks when not ignoring whitespace chars...
|
||||
parseSimpleToken(token, c);
|
||||
}
|
||||
}
|
||||
return token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an encapsulated token.
|
||||
* <p/>
|
||||
* Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included
|
||||
* in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after
|
||||
* an encapsulated token are ignored. The token is finished when one of the following conditions become true:
|
||||
* <ul>
|
||||
* <li>an unescaped encapsulator has been reached, and is followed by optional whitespace then:</li>
|
||||
* <ul>
|
||||
* <li>delimiter (TOKEN)</li>
|
||||
* <li>end of line (EORECORD)</li>
|
||||
* </ul>
|
||||
* <li>end of stream has been reached (EOF)</li> </ul>
|
||||
*
|
||||
* @param token
|
||||
* the current token
|
||||
* @return a valid token object
|
||||
* @throws IOException
|
||||
* on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL
|
||||
*/
|
||||
private Token parseEncapsulatedToken(final Token token) throws IOException {
|
||||
// save current line number in case needed for IOE
|
||||
final long startLineNumber = getCurrentLineNumber();
|
||||
int c;
|
||||
while (true) {
|
||||
c = reader.read();
|
||||
|
||||
if (isEscape(c)) {
|
||||
final int unescaped = readEscape();
|
||||
if (unescaped == END_OF_STREAM) { // unexpected char after escape
|
||||
token.content.append((char) c).append((char) reader.getLastChar());
|
||||
} else {
|
||||
token.content.append((char) unescaped);
|
||||
}
|
||||
} else if (isQuoteChar(c)) {
|
||||
if (isQuoteChar(reader.lookAhead())) {
|
||||
// double or escaped encapsulator -> add single encapsulator to token
|
||||
c = reader.read();
|
||||
token.content.append((char) c);
|
||||
} else {
|
||||
// token finish mark (encapsulator) reached: ignore whitespace till delimiter
|
||||
while (true) {
|
||||
c = reader.read();
|
||||
if (isDelimiter(c)) {
|
||||
token.type = TOKEN;
|
||||
return token;
|
||||
} else if (isEndOfFile(c)) {
|
||||
token.type = EOF;
|
||||
token.isReady = true; // There is data at EOF
|
||||
return token;
|
||||
} else if (readEndOfLine(c)) {
|
||||
token.type = EORECORD;
|
||||
return token;
|
||||
} else if (!isWhitespace(c)) {
|
||||
// error invalid char between token and next delimiter
|
||||
throw new IOException("(line " + getCurrentLineNumber() +
|
||||
") invalid char between encapsulated token and delimiter");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (isEndOfFile(c)) {
|
||||
// error condition (end of file before end of token)
|
||||
throw new IOException("(startline " + startLineNumber +
|
||||
") EOF reached before encapsulated token finished");
|
||||
} else {
|
||||
// consume character
|
||||
token.content.append((char) c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a simple token.
|
||||
* <p/>
|
||||
* Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
|
||||
* delimiters (as \, or \;). The token is finished when one of the following conditions become true:
|
||||
* <ul>
|
||||
* <li>end of line has been reached (EORECORD)</li>
|
||||
* <li>end of stream has been reached (EOF)</li>
|
||||
* <li>an unescaped delimiter has been reached (TOKEN)</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param token
|
||||
* the current token
|
||||
* @param ch
|
||||
* the current character
|
||||
* @return the filled token
|
||||
* @throws IOException
|
||||
* on stream access error
|
||||
*/
|
||||
private Token parseSimpleToken(final Token token, int ch) throws IOException {
|
||||
// Faster to use while(true)+break than while(token.type == INVALID)
|
||||
while (true) {
|
||||
if (readEndOfLine(ch)) {
|
||||
token.type = EORECORD;
|
||||
break;
|
||||
} else if (isEndOfFile(ch)) {
|
||||
token.type = EOF;
|
||||
token.isReady = true; // There is data at EOF
|
||||
break;
|
||||
} else if (isDelimiter(ch)) {
|
||||
token.type = TOKEN;
|
||||
break;
|
||||
} else if (isEscape(ch)) {
|
||||
final int unescaped = readEscape();
|
||||
if (unescaped == END_OF_STREAM) { // unexpected char after escape
|
||||
token.content.append((char) ch).append((char) reader.getLastChar());
|
||||
} else {
|
||||
token.content.append((char) unescaped);
|
||||
}
|
||||
ch = reader.read(); // continue
|
||||
} else {
|
||||
token.content.append((char) ch);
|
||||
ch = reader.read(); // continue
|
||||
}
|
||||
}
|
||||
|
||||
if (ignoreSurroundingSpaces) {
|
||||
trimTrailingSpaces(token.content);
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character...
|
||||
*
|
||||
* @return true if the given or next character is a line-terminator
|
||||
*/
|
||||
boolean readEndOfLine(int ch) throws IOException {
|
||||
// check if we have \r\n...
|
||||
if (ch == CR && reader.lookAhead() == LF) {
|
||||
// note: does not change ch outside of this method!
|
||||
ch = reader.read();
|
||||
// Save the EOL state
|
||||
if (firstEol == null) {
|
||||
this.firstEol = Constants.CRLF;
|
||||
}
|
||||
}
|
||||
// save EOL state here.
|
||||
if (firstEol == null) {
|
||||
if (ch == LF) {
|
||||
this.firstEol = LF_STRING;
|
||||
} else if (ch == CR) {
|
||||
this.firstEol = CR_STRING;
|
||||
}
|
||||
}
|
||||
|
||||
return ch == LF || ch == CR;
|
||||
}
|
||||
|
||||
// TODO escape handling needs more work
|
||||
/**
|
||||
* Handle an escape sequence.
|
||||
* The current character must be the escape character.
|
||||
* On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()}
|
||||
* on the input stream.
|
||||
*
|
||||
* @return the unescaped character (as an int) or {@link Constants#END_OF_STREAM} if char following the escape is
|
||||
* invalid.
|
||||
* @throws IOException if there is a problem reading the stream or the end of stream is detected:
|
||||
* the escape character is not allowed at end of stream
|
||||
*/
|
||||
int readEscape() throws IOException {
|
||||
// the escape char has just been read (normally a backslash)
|
||||
final int ch = reader.read();
|
||||
switch (ch) {
|
||||
case 'r':
|
||||
return CR;
|
||||
case 'n':
|
||||
return LF;
|
||||
case 't':
|
||||
return TAB;
|
||||
case 'b':
|
||||
return BACKSPACE;
|
||||
case 'f':
|
||||
return FF;
|
||||
case CR:
|
||||
case LF:
|
||||
case FF: // TODO is this correct?
|
||||
case TAB: // TODO is this correct? Do tabs need to be escaped?
|
||||
case BACKSPACE: // TODO is this correct?
|
||||
return ch;
|
||||
case END_OF_STREAM:
|
||||
throw new IOException("EOF whilst processing escape sequence");
|
||||
default:
|
||||
// Now check for meta-characters
|
||||
if (isMetaChar(ch)) {
|
||||
return ch;
|
||||
}
|
||||
// indicate unexpected char - available from in.getLastChar()
|
||||
return END_OF_STREAM;
|
||||
}
|
||||
}
|
||||
|
||||
void trimTrailingSpaces(final StringBuilder buffer) {
|
||||
int length = buffer.length();
|
||||
while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
|
||||
length = length - 1;
|
||||
}
|
||||
if (length != buffer.length()) {
|
||||
buffer.setLength(length);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.csv;
|
||||
|
||||
/**
|
||||
* Defines quoting behavior when printing.
|
||||
*/
|
||||
public enum QuoteMode {
|
||||
|
||||
/**
|
||||
* Quotes all fields.
|
||||
*/
|
||||
ALL,
|
||||
|
||||
/**
|
||||
* Quotes all non-null fields.
|
||||
*/
|
||||
ALL_NON_NULL,
|
||||
|
||||
/**
|
||||
* Quotes fields which contain special characters such as a the field delimiter, quote character or any of the
|
||||
* characters in the line separator string.
|
||||
*/
|
||||
MINIMAL,
|
||||
|
||||
/**
|
||||
* Quotes all non-numeric fields.
|
||||
*/
|
||||
NON_NUMERIC,
|
||||
|
||||
/**
|
||||
* Never quotes fields. When the delimiter occurs in data, the printer prefixes it with the escape character. If the
|
||||
* escape character is not set, format validation throws an exception.
|
||||
*/
|
||||
NONE
|
||||
}
|
|
@ -1,73 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv;
|
||||
|
||||
import static org.apache.commons.csv.Token.Type.INVALID;
|
||||
|
||||
/**
|
||||
* Internal token representation.
|
||||
* <p/>
|
||||
* It is used as contract between the lexer and the parser.
|
||||
*/
|
||||
final class Token {
|
||||
|
||||
enum Type {
|
||||
/** Token has no valid content, i.e. is in its initialized state. */
|
||||
INVALID,
|
||||
|
||||
/** Token with content, at beginning or in the middle of a line. */
|
||||
TOKEN,
|
||||
|
||||
/** Token (which can have content) when the end of file is reached. */
|
||||
EOF,
|
||||
|
||||
/** Token with content when the end of a line is reached. */
|
||||
EORECORD,
|
||||
|
||||
/** Token is a comment line. */
|
||||
COMMENT
|
||||
}
|
||||
|
||||
/** length of the initial token (content-)buffer */
|
||||
private static final int INITIAL_TOKEN_LENGTH = 50;
|
||||
|
||||
/** Token type */
|
||||
Token.Type type = INVALID;
|
||||
|
||||
/** The content buffer. */
|
||||
final StringBuilder content = new StringBuilder(INITIAL_TOKEN_LENGTH);
|
||||
|
||||
/** Token ready flag: indicates a valid token with content (ready for the parser). */
|
||||
boolean isReady;
|
||||
|
||||
void reset() {
|
||||
content.setLength(0);
|
||||
type = INVALID;
|
||||
isReady = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Eases IDE debugging.
|
||||
*
|
||||
* @return a string helpful for debugging.
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return type.name() + " [" + content.toString() + "]";
|
||||
}
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Apache Commons CSV Format Support.
|
||||
*
|
||||
* <p>CSV are widely used as interfaces to legacy systems or manual data-imports.
|
||||
* CSV stands for "Comma Separated Values" (or sometimes "Character Separated
|
||||
* Values"). The CSV data format is defined in
|
||||
* <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a>
|
||||
* but many dialects exist.</p>
|
||||
*
|
||||
* <p>Common to all file dialects is its basic structure: The CSV data-format
|
||||
* is record oriented, whereas each record starts on a new textual line. A
|
||||
* record is build of a list of values. Keep in mind that not all records
|
||||
* must have an equal number of values:</p>
|
||||
* <pre>
|
||||
* csv := records*
|
||||
* record := values*
|
||||
* </pre>
|
||||
*
|
||||
* <p>The following list contains the CSV aspects the Commons CSV parser supports:</p>
|
||||
* <dl>
|
||||
* <dt>Separators (for lines)</dt>
|
||||
* <dd>The record separators are hardcoded and cannot be changed. The must be '\r', '\n' or '\r\n'.</dd>
|
||||
*
|
||||
* <dt>Delimiter (for values)</dt>
|
||||
* <dd>The delimiter for values is freely configurable (default ',').</dd>
|
||||
*
|
||||
* <dt>Comments</dt>
|
||||
* <dd>Some CSV-dialects support a simple comment syntax. A comment is a record
|
||||
* which must start with a designated character (the commentStarter). A record
|
||||
* of this kind is treated as comment and gets removed from the input (default none)</dd>
|
||||
*
|
||||
* <dt>Encapsulator</dt>
|
||||
* <dd>Two encapsulator characters (default '"') are used to enclose -> complex values.</dd>
|
||||
*
|
||||
* <dt>Simple values</dt>
|
||||
* <dd>A simple value consist of all characters (except the delimiter) until
|
||||
* (but not including) the next delimiter or a record-terminator. Optionally
|
||||
* all surrounding whitespaces of a simple value can be ignored (default: true).</dd>
|
||||
*
|
||||
* <dt>Complex values</dt>
|
||||
* <dd>Complex values are encapsulated within a pair of the defined encapsulator characters.
|
||||
* The encapsulator itself must be escaped or doubled when used inside complex values.
|
||||
* Complex values preserve all kind of formatting (including newlines -> multiline-values)</dd>
|
||||
*
|
||||
* <dt>Empty line skipping</dt>
|
||||
* <dd>Optionally empty lines in CSV files can be skipped.
|
||||
* Otherwise, empty lines will return a record with a single empty value.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* <p>In addition to individually defined dialects, two predefined dialects (strict-csv, and excel-csv)
|
||||
* can be set directly.</p> <!-- TODO fix -->
|
||||
*
|
||||
* <p>Example usage:</p>
|
||||
* <blockquote><pre>
|
||||
* Reader in = new StringReader("a,b,c");
|
||||
* for (CSVRecord record : CSVFormat.DEFAULT.parse(in)) {
|
||||
* for (String field : record) {
|
||||
* System.out.print("\"" + field + "\", ");
|
||||
* }
|
||||
* System.out.println();
|
||||
* }
|
||||
* </pre></blockquote>
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv;
|
Loading…
Reference in New Issue