Sort methods in AB order.
This commit is contained in:
parent
abd7de4283
commit
21f4f584ba
|
@ -184,6 +184,26 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Header information based on name and position.
|
||||
*/
|
||||
private static final class Headers {
|
||||
/**
|
||||
* Header column positions (0-based)
|
||||
*/
|
||||
final Map<String, Integer> headerMap;
|
||||
|
||||
/**
|
||||
* Header names in column order
|
||||
*/
|
||||
final List<String> headerNames;
|
||||
|
||||
Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
|
||||
this.headerMap = headerMap;
|
||||
this.headerNames = headerNames;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a parser for the given {@link File}.
|
||||
*
|
||||
|
@ -281,6 +301,8 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
return new CSVParser(reader, format);
|
||||
}
|
||||
|
||||
// the following objects are shared to reduce garbage
|
||||
|
||||
/**
|
||||
* Creates a parser for the given {@link String}.
|
||||
*
|
||||
|
@ -301,8 +323,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
return new CSVParser(new StringReader(string), format);
|
||||
}
|
||||
|
||||
// the following objects are shared to reduce garbage
|
||||
|
||||
/**
|
||||
* Creates and returns a parser for the given URL, which the caller MUST close.
|
||||
*
|
||||
|
@ -448,26 +468,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
new LinkedHashMap<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Header information based on name and position.
|
||||
*/
|
||||
private static final class Headers {
|
||||
/**
|
||||
* Header column positions (0-based)
|
||||
*/
|
||||
final Map<String, Integer> headerMap;
|
||||
|
||||
/**
|
||||
* Header names in column order
|
||||
*/
|
||||
final List<String> headerNames;
|
||||
|
||||
Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
|
||||
this.headerMap = headerMap;
|
||||
this.headerNames = headerNames;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the name to index mapping if the format defines a header.
|
||||
*
|
||||
|
|
|
@ -228,17 +228,6 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
return headerMap != null && headerMap.containsKey(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether a given columns is mapped and has a value.
|
||||
*
|
||||
* @param name
|
||||
* the name of the column to be retrieved.
|
||||
* @return whether a given columns is mapped and has a value
|
||||
*/
|
||||
public boolean isSet(final String name) {
|
||||
return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether a column with given index has a value.
|
||||
*
|
||||
|
@ -250,6 +239,17 @@ public final class CSVRecord implements Serializable, Iterable<String> {
|
|||
return 0 <= index && index < values.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether a given columns is mapped and has a value.
|
||||
*
|
||||
* @param name
|
||||
* the name of the column to be retrieved.
|
||||
* @return whether a given columns is mapped and has a value
|
||||
*/
|
||||
public boolean isSet(final String name) {
|
||||
return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an iterator over the values of this record.
|
||||
*
|
||||
|
|
|
@ -53,15 +53,31 @@ final class ExtendedBufferedReader extends BufferedReader {
|
|||
super(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the stream.
|
||||
*
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
final int current = super.read();
|
||||
if (current == CR || current == LF && lastChar != CR) {
|
||||
eolCounter++;
|
||||
public void close() throws IOException {
|
||||
// Set ivars before calling super close() in case close() throws an IOException.
|
||||
closed = true;
|
||||
lastChar = END_OF_STREAM;
|
||||
super.close();
|
||||
}
|
||||
lastChar = current;
|
||||
this.position++;
|
||||
return lastChar;
|
||||
|
||||
/**
|
||||
* Returns the current line number
|
||||
*
|
||||
* @return the current line number
|
||||
*/
|
||||
long getCurrentLineNumber() {
|
||||
// Check if we are at EOL or EOF or just starting
|
||||
if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
|
||||
return eolCounter; // counter is accurate
|
||||
}
|
||||
return eolCounter + 1; // Allow for counter being incremented only at EOL
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -76,6 +92,47 @@ final class ExtendedBufferedReader extends BufferedReader {
|
|||
return lastChar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the character position in the reader.
|
||||
*
|
||||
* @return the current position in the reader (counting characters, not bytes since this is a Reader)
|
||||
*/
|
||||
long getPosition() {
|
||||
return this.position;
|
||||
}
|
||||
|
||||
public boolean isClosed() {
|
||||
return closed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
|
||||
* still return this value. Does not affect line number or last character.
|
||||
*
|
||||
* @return the next character
|
||||
*
|
||||
* @throws IOException
|
||||
* if there is an error in reading
|
||||
*/
|
||||
int lookAhead() throws IOException {
|
||||
super.mark(1);
|
||||
final int c = super.read();
|
||||
super.reset();
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
final int current = super.read();
|
||||
if (current == CR || current == LF && lastChar != CR) {
|
||||
eolCounter++;
|
||||
}
|
||||
lastChar = current;
|
||||
this.position++;
|
||||
return lastChar;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(final char[] buf, final int offset, final int length) throws IOException {
|
||||
if (length == 0) {
|
||||
|
@ -131,61 +188,4 @@ final class ExtendedBufferedReader extends BufferedReader {
|
|||
return line;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
|
||||
* still return this value. Does not affect line number or last character.
|
||||
*
|
||||
* @return the next character
|
||||
*
|
||||
* @throws IOException
|
||||
* if there is an error in reading
|
||||
*/
|
||||
int lookAhead() throws IOException {
|
||||
super.mark(1);
|
||||
final int c = super.read();
|
||||
super.reset();
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current line number
|
||||
*
|
||||
* @return the current line number
|
||||
*/
|
||||
long getCurrentLineNumber() {
|
||||
// Check if we are at EOL or EOF or just starting
|
||||
if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
|
||||
return eolCounter; // counter is accurate
|
||||
}
|
||||
return eolCounter + 1; // Allow for counter being incremented only at EOL
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the character position in the reader.
|
||||
*
|
||||
* @return the current position in the reader (counting characters, not bytes since this is a Reader)
|
||||
*/
|
||||
long getPosition() {
|
||||
return this.position;
|
||||
}
|
||||
|
||||
public boolean isClosed() {
|
||||
return closed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the stream.
|
||||
*
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
// Set ivars before calling super close() in case close() throws an IOException.
|
||||
closed = true;
|
||||
lastChar = END_OF_STREAM;
|
||||
super.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -60,10 +60,6 @@ final class Lexer implements Closeable {
|
|||
private final ExtendedBufferedReader reader;
|
||||
private String firstEol;
|
||||
|
||||
String getFirstEol(){
|
||||
return firstEol;
|
||||
}
|
||||
|
||||
Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
|
||||
this.reader = reader;
|
||||
this.delimiter = format.getDelimiter();
|
||||
|
@ -74,6 +70,94 @@ final class Lexer implements Closeable {
|
|||
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes resources.
|
||||
*
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
reader.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current character position
|
||||
*
|
||||
* @return the current character position
|
||||
*/
|
||||
long getCharacterPosition() {
|
||||
return reader.getPosition();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current line number
|
||||
*
|
||||
* @return the current line number
|
||||
*/
|
||||
long getCurrentLineNumber() {
|
||||
return reader.getCurrentLineNumber();
|
||||
}
|
||||
|
||||
String getFirstEol(){
|
||||
return firstEol;
|
||||
}
|
||||
|
||||
boolean isClosed() {
|
||||
return reader.isClosed();
|
||||
}
|
||||
|
||||
boolean isCommentStart(final int ch) {
|
||||
return ch == commentStart;
|
||||
}
|
||||
|
||||
boolean isDelimiter(final int ch) {
|
||||
return ch == delimiter;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given character indicates end of file
|
||||
*/
|
||||
boolean isEndOfFile(final int ch) {
|
||||
return ch == END_OF_STREAM;
|
||||
}
|
||||
|
||||
boolean isEscape(final int ch) {
|
||||
return ch == escape;
|
||||
}
|
||||
|
||||
private boolean isMetaChar(final int ch) {
|
||||
return ch == delimiter ||
|
||||
ch == escape ||
|
||||
ch == quoteChar ||
|
||||
ch == commentStart;
|
||||
}
|
||||
|
||||
boolean isQuoteChar(final int ch) {
|
||||
return ch == quoteChar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the current character represents the start of a line: a CR, LF or is at the start of the file.
|
||||
*
|
||||
* @param ch the character to check
|
||||
* @return true if the character is at the start of a line.
|
||||
*/
|
||||
boolean isStartOfLine(final int ch) {
|
||||
return ch == LF || ch == CR || ch == UNDEFINED;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given char is a whitespace character
|
||||
*/
|
||||
boolean isWhitespace(final int ch) {
|
||||
return !isDelimiter(ch) && Character.isWhitespace((char) ch);
|
||||
}
|
||||
|
||||
private char mapNullToDisabled(final Character c) {
|
||||
return c == null ? DISABLED : c.charValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next token.
|
||||
* <p>
|
||||
|
@ -170,59 +254,6 @@ final class Lexer implements Closeable {
|
|||
return token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a simple token.
|
||||
* <p/>
|
||||
* Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
|
||||
* delimiters (as \, or \;). The token is finished when one of the following conditions become true:
|
||||
* <ul>
|
||||
* <li>end of line has been reached (EORECORD)</li>
|
||||
* <li>end of stream has been reached (EOF)</li>
|
||||
* <li>an unescaped delimiter has been reached (TOKEN)</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param token
|
||||
* the current token
|
||||
* @param ch
|
||||
* the current character
|
||||
* @return the filled token
|
||||
* @throws IOException
|
||||
* on stream access error
|
||||
*/
|
||||
private Token parseSimpleToken(final Token token, int ch) throws IOException {
|
||||
// Faster to use while(true)+break than while(token.type == INVALID)
|
||||
while (true) {
|
||||
if (readEndOfLine(ch)) {
|
||||
token.type = EORECORD;
|
||||
break;
|
||||
} else if (isEndOfFile(ch)) {
|
||||
token.type = EOF;
|
||||
token.isReady = true; // There is data at EOF
|
||||
break;
|
||||
} else if (isDelimiter(ch)) {
|
||||
token.type = TOKEN;
|
||||
break;
|
||||
} else if (isEscape(ch)) {
|
||||
final int unescaped = readEscape();
|
||||
if (unescaped == END_OF_STREAM) { // unexpected char after escape
|
||||
token.content.append((char) ch).append((char) reader.getLastChar());
|
||||
} else {
|
||||
token.content.append((char) unescaped);
|
||||
}
|
||||
ch = reader.read(); // continue
|
||||
} else {
|
||||
token.content.append((char) ch);
|
||||
ch = reader.read(); // continue
|
||||
}
|
||||
}
|
||||
|
||||
if (ignoreSurroundingSpaces) {
|
||||
trimTrailingSpaces(token.content);
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an encapsulated token.
|
||||
* <p/>
|
||||
|
@ -294,26 +325,84 @@ final class Lexer implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
private char mapNullToDisabled(final Character c) {
|
||||
return c == null ? DISABLED : c.charValue();
|
||||
/**
|
||||
* Parses a simple token.
|
||||
* <p/>
|
||||
* Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
|
||||
* delimiters (as \, or \;). The token is finished when one of the following conditions become true:
|
||||
* <ul>
|
||||
* <li>end of line has been reached (EORECORD)</li>
|
||||
* <li>end of stream has been reached (EOF)</li>
|
||||
* <li>an unescaped delimiter has been reached (TOKEN)</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param token
|
||||
* the current token
|
||||
* @param ch
|
||||
* the current character
|
||||
* @return the filled token
|
||||
* @throws IOException
|
||||
* on stream access error
|
||||
*/
|
||||
private Token parseSimpleToken(final Token token, int ch) throws IOException {
|
||||
// Faster to use while(true)+break than while(token.type == INVALID)
|
||||
while (true) {
|
||||
if (readEndOfLine(ch)) {
|
||||
token.type = EORECORD;
|
||||
break;
|
||||
} else if (isEndOfFile(ch)) {
|
||||
token.type = EOF;
|
||||
token.isReady = true; // There is data at EOF
|
||||
break;
|
||||
} else if (isDelimiter(ch)) {
|
||||
token.type = TOKEN;
|
||||
break;
|
||||
} else if (isEscape(ch)) {
|
||||
final int unescaped = readEscape();
|
||||
if (unescaped == END_OF_STREAM) { // unexpected char after escape
|
||||
token.content.append((char) ch).append((char) reader.getLastChar());
|
||||
} else {
|
||||
token.content.append((char) unescaped);
|
||||
}
|
||||
ch = reader.read(); // continue
|
||||
} else {
|
||||
token.content.append((char) ch);
|
||||
ch = reader.read(); // continue
|
||||
}
|
||||
}
|
||||
|
||||
if (ignoreSurroundingSpaces) {
|
||||
trimTrailingSpaces(token.content);
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current line number
|
||||
* Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character...
|
||||
*
|
||||
* @return the current line number
|
||||
* @return true if the given or next character is a line-terminator
|
||||
*/
|
||||
long getCurrentLineNumber() {
|
||||
return reader.getCurrentLineNumber();
|
||||
boolean readEndOfLine(int ch) throws IOException {
|
||||
// check if we have \r\n...
|
||||
if (ch == CR && reader.lookAhead() == LF) {
|
||||
// note: does not change ch outside of this method!
|
||||
ch = reader.read();
|
||||
// Save the EOL state
|
||||
if (firstEol == null) {
|
||||
this.firstEol = Constants.CRLF;
|
||||
}
|
||||
}
|
||||
// save EOL state here.
|
||||
if (firstEol == null) {
|
||||
if (ch == LF) {
|
||||
this.firstEol = LF_STRING;
|
||||
} else if (ch == CR) {
|
||||
this.firstEol = CR_STRING;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current character position
|
||||
*
|
||||
* @return the current character position
|
||||
*/
|
||||
long getCharacterPosition() {
|
||||
return reader.getPosition();
|
||||
return ch == LF || ch == CR;
|
||||
}
|
||||
|
||||
// TODO escape handling needs more work
|
||||
|
@ -369,93 +458,4 @@ final class Lexer implements Closeable {
|
|||
buffer.setLength(length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character...
|
||||
*
|
||||
* @return true if the given or next character is a line-terminator
|
||||
*/
|
||||
boolean readEndOfLine(int ch) throws IOException {
|
||||
// check if we have \r\n...
|
||||
if (ch == CR && reader.lookAhead() == LF) {
|
||||
// note: does not change ch outside of this method!
|
||||
ch = reader.read();
|
||||
// Save the EOL state
|
||||
if (firstEol == null) {
|
||||
this.firstEol = Constants.CRLF;
|
||||
}
|
||||
}
|
||||
// save EOL state here.
|
||||
if (firstEol == null) {
|
||||
if (ch == LF) {
|
||||
this.firstEol = LF_STRING;
|
||||
} else if (ch == CR) {
|
||||
this.firstEol = CR_STRING;
|
||||
}
|
||||
}
|
||||
|
||||
return ch == LF || ch == CR;
|
||||
}
|
||||
|
||||
boolean isClosed() {
|
||||
return reader.isClosed();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given char is a whitespace character
|
||||
*/
|
||||
boolean isWhitespace(final int ch) {
|
||||
return !isDelimiter(ch) && Character.isWhitespace((char) ch);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the current character represents the start of a line: a CR, LF or is at the start of the file.
|
||||
*
|
||||
* @param ch the character to check
|
||||
* @return true if the character is at the start of a line.
|
||||
*/
|
||||
boolean isStartOfLine(final int ch) {
|
||||
return ch == LF || ch == CR || ch == UNDEFINED;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given character indicates end of file
|
||||
*/
|
||||
boolean isEndOfFile(final int ch) {
|
||||
return ch == END_OF_STREAM;
|
||||
}
|
||||
|
||||
boolean isDelimiter(final int ch) {
|
||||
return ch == delimiter;
|
||||
}
|
||||
|
||||
boolean isEscape(final int ch) {
|
||||
return ch == escape;
|
||||
}
|
||||
|
||||
boolean isQuoteChar(final int ch) {
|
||||
return ch == quoteChar;
|
||||
}
|
||||
|
||||
boolean isCommentStart(final int ch) {
|
||||
return ch == commentStart;
|
||||
}
|
||||
|
||||
private boolean isMetaChar(final int ch) {
|
||||
return ch == delimiter ||
|
||||
ch == escape ||
|
||||
ch == quoteChar ||
|
||||
ch == commentStart;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes resources.
|
||||
*
|
||||
* @throws IOException
|
||||
* If an I/O error occurs
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,9 +26,6 @@ import static org.apache.commons.csv.Token.Type.INVALID;
|
|||
*/
|
||||
final class Token {
|
||||
|
||||
/** length of the initial token (content-)buffer */
|
||||
private static final int INITIAL_TOKEN_LENGTH = 50;
|
||||
|
||||
enum Type {
|
||||
/** Token has no valid content, i.e. is in its initialized state. */
|
||||
INVALID,
|
||||
|
@ -46,6 +43,9 @@ final class Token {
|
|||
COMMENT
|
||||
}
|
||||
|
||||
/** length of the initial token (content-)buffer */
|
||||
private static final int INITIAL_TOKEN_LENGTH = 50;
|
||||
|
||||
/** Token type */
|
||||
Token.Type type = INVALID;
|
||||
|
||||
|
|
Loading…
Reference in New Issue