Sort methods in AB order.

This commit is contained in:
Gary Gregory 2020-05-24 16:08:41 -04:00
parent abd7de4283
commit 21f4f584ba
5 changed files with 260 additions and 260 deletions

View File

@ -184,6 +184,26 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
}
}
/**
* Header information based on name and position.
*/
private static final class Headers {
/**
* Header column positions (0-based)
*/
final Map<String, Integer> headerMap;
/**
* Header names in column order
*/
final List<String> headerNames;
Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
this.headerMap = headerMap;
this.headerNames = headerNames;
}
}
/**
* Creates a parser for the given {@link File}.
*
@ -281,6 +301,8 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
return new CSVParser(reader, format);
}
// the following objects are shared to reduce garbage
/**
* Creates a parser for the given {@link String}.
*
@ -301,8 +323,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
return new CSVParser(new StringReader(string), format);
}
// the following objects are shared to reduce garbage
/**
* Creates and returns a parser for the given URL, which the caller MUST close.
*
@ -448,26 +468,6 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
new LinkedHashMap<>();
}
/**
* Header information based on name and position.
*/
private static final class Headers {
/**
* Header column positions (0-based)
*/
final Map<String, Integer> headerMap;
/**
* Header names in column order
*/
final List<String> headerNames;
Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
this.headerMap = headerMap;
this.headerNames = headerNames;
}
}
/**
* Creates the name to index mapping if the format defines a header.
*

View File

@ -228,17 +228,6 @@ public final class CSVRecord implements Serializable, Iterable<String> {
return headerMap != null && headerMap.containsKey(name);
}
/**
* Checks whether a given columns is mapped and has a value.
*
* @param name
* the name of the column to be retrieved.
* @return whether a given columns is mapped and has a value
*/
public boolean isSet(final String name) {
return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
}
/**
* Checks whether a column with given index has a value.
*
@ -250,6 +239,17 @@ public final class CSVRecord implements Serializable, Iterable<String> {
return 0 <= index && index < values.length;
}
/**
* Checks whether a given columns is mapped and has a value.
*
* @param name
* the name of the column to be retrieved.
* @return whether a given columns is mapped and has a value
*/
public boolean isSet(final String name) {
return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
}
/**
* Returns an iterator over the values of this record.
*

View File

@ -53,15 +53,31 @@ final class ExtendedBufferedReader extends BufferedReader {
super(reader);
}
/**
* Closes the stream.
*
* @throws IOException
* If an I/O error occurs
*/
@Override
public int read() throws IOException {
final int current = super.read();
if (current == CR || current == LF && lastChar != CR) {
eolCounter++;
public void close() throws IOException {
// Set ivars before calling super close() in case close() throws an IOException.
closed = true;
lastChar = END_OF_STREAM;
super.close();
}
/**
* Returns the current line number
*
* @return the current line number
*/
long getCurrentLineNumber() {
// Check if we are at EOL or EOF or just starting
if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
return eolCounter; // counter is accurate
}
lastChar = current;
this.position++;
return lastChar;
return eolCounter + 1; // Allow for counter being incremented only at EOL
}
/**
@ -76,6 +92,47 @@ final class ExtendedBufferedReader extends BufferedReader {
return lastChar;
}
/**
* Gets the character position in the reader.
*
* @return the current position in the reader (counting characters, not bytes since this is a Reader)
*/
long getPosition() {
return this.position;
}
public boolean isClosed() {
return closed;
}
/**
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
* still return this value. Does not affect line number or last character.
*
* @return the next character
*
* @throws IOException
* if there is an error in reading
*/
int lookAhead() throws IOException {
super.mark(1);
final int c = super.read();
super.reset();
return c;
}
@Override
public int read() throws IOException {
final int current = super.read();
if (current == CR || current == LF && lastChar != CR) {
eolCounter++;
}
lastChar = current;
this.position++;
return lastChar;
}
@Override
public int read(final char[] buf, final int offset, final int length) throws IOException {
if (length == 0) {
@ -131,61 +188,4 @@ final class ExtendedBufferedReader extends BufferedReader {
return line;
}
/**
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
* still return this value. Does not affect line number or last character.
*
* @return the next character
*
* @throws IOException
* if there is an error in reading
*/
int lookAhead() throws IOException {
super.mark(1);
final int c = super.read();
super.reset();
return c;
}
/**
* Returns the current line number
*
* @return the current line number
*/
long getCurrentLineNumber() {
// Check if we are at EOL or EOF or just starting
if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
return eolCounter; // counter is accurate
}
return eolCounter + 1; // Allow for counter being incremented only at EOL
}
/**
* Gets the character position in the reader.
*
* @return the current position in the reader (counting characters, not bytes since this is a Reader)
*/
long getPosition() {
return this.position;
}
public boolean isClosed() {
return closed;
}
/**
* Closes the stream.
*
* @throws IOException
* If an I/O error occurs
*/
@Override
public void close() throws IOException {
// Set ivars before calling super close() in case close() throws an IOException.
closed = true;
lastChar = END_OF_STREAM;
super.close();
}
}

View File

@ -60,10 +60,6 @@ final class Lexer implements Closeable {
private final ExtendedBufferedReader reader;
private String firstEol;
String getFirstEol(){
return firstEol;
}
Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
this.reader = reader;
this.delimiter = format.getDelimiter();
@ -74,6 +70,94 @@ final class Lexer implements Closeable {
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
}
/**
* Closes resources.
*
* @throws IOException
* If an I/O error occurs
*/
@Override
public void close() throws IOException {
reader.close();
}
/**
* Returns the current character position
*
* @return the current character position
*/
long getCharacterPosition() {
return reader.getPosition();
}
/**
* Returns the current line number
*
* @return the current line number
*/
long getCurrentLineNumber() {
return reader.getCurrentLineNumber();
}
String getFirstEol(){
return firstEol;
}
boolean isClosed() {
return reader.isClosed();
}
boolean isCommentStart(final int ch) {
return ch == commentStart;
}
boolean isDelimiter(final int ch) {
return ch == delimiter;
}
/**
* @return true if the given character indicates end of file
*/
boolean isEndOfFile(final int ch) {
return ch == END_OF_STREAM;
}
boolean isEscape(final int ch) {
return ch == escape;
}
private boolean isMetaChar(final int ch) {
return ch == delimiter ||
ch == escape ||
ch == quoteChar ||
ch == commentStart;
}
boolean isQuoteChar(final int ch) {
return ch == quoteChar;
}
/**
* Checks if the current character represents the start of a line: a CR, LF or is at the start of the file.
*
* @param ch the character to check
* @return true if the character is at the start of a line.
*/
boolean isStartOfLine(final int ch) {
return ch == LF || ch == CR || ch == UNDEFINED;
}
/**
* @return true if the given char is a whitespace character
*/
boolean isWhitespace(final int ch) {
return !isDelimiter(ch) && Character.isWhitespace((char) ch);
}
private char mapNullToDisabled(final Character c) {
return c == null ? DISABLED : c.charValue();
}
/**
* Returns the next token.
* <p>
@ -170,59 +254,6 @@ final class Lexer implements Closeable {
return token;
}
/**
* Parses a simple token.
* <p/>
* Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
* delimiters (as \, or \;). The token is finished when one of the following conditions become true:
* <ul>
* <li>end of line has been reached (EORECORD)</li>
* <li>end of stream has been reached (EOF)</li>
* <li>an unescaped delimiter has been reached (TOKEN)</li>
* </ul>
*
* @param token
* the current token
* @param ch
* the current character
* @return the filled token
* @throws IOException
* on stream access error
*/
private Token parseSimpleToken(final Token token, int ch) throws IOException {
// Faster to use while(true)+break than while(token.type == INVALID)
while (true) {
if (readEndOfLine(ch)) {
token.type = EORECORD;
break;
} else if (isEndOfFile(ch)) {
token.type = EOF;
token.isReady = true; // There is data at EOF
break;
} else if (isDelimiter(ch)) {
token.type = TOKEN;
break;
} else if (isEscape(ch)) {
final int unescaped = readEscape();
if (unescaped == END_OF_STREAM) { // unexpected char after escape
token.content.append((char) ch).append((char) reader.getLastChar());
} else {
token.content.append((char) unescaped);
}
ch = reader.read(); // continue
} else {
token.content.append((char) ch);
ch = reader.read(); // continue
}
}
if (ignoreSurroundingSpaces) {
trimTrailingSpaces(token.content);
}
return token;
}
/**
* Parses an encapsulated token.
* <p/>
@ -294,26 +325,84 @@ final class Lexer implements Closeable {
}
}
private char mapNullToDisabled(final Character c) {
return c == null ? DISABLED : c.charValue();
/**
* Parses a simple token.
* <p/>
* Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
* delimiters (as \, or \;). The token is finished when one of the following conditions become true:
* <ul>
* <li>end of line has been reached (EORECORD)</li>
* <li>end of stream has been reached (EOF)</li>
* <li>an unescaped delimiter has been reached (TOKEN)</li>
* </ul>
*
* @param token
* the current token
* @param ch
* the current character
* @return the filled token
* @throws IOException
* on stream access error
*/
private Token parseSimpleToken(final Token token, int ch) throws IOException {
// Faster to use while(true)+break than while(token.type == INVALID)
while (true) {
if (readEndOfLine(ch)) {
token.type = EORECORD;
break;
} else if (isEndOfFile(ch)) {
token.type = EOF;
token.isReady = true; // There is data at EOF
break;
} else if (isDelimiter(ch)) {
token.type = TOKEN;
break;
} else if (isEscape(ch)) {
final int unescaped = readEscape();
if (unescaped == END_OF_STREAM) { // unexpected char after escape
token.content.append((char) ch).append((char) reader.getLastChar());
} else {
token.content.append((char) unescaped);
}
ch = reader.read(); // continue
} else {
token.content.append((char) ch);
ch = reader.read(); // continue
}
}
if (ignoreSurroundingSpaces) {
trimTrailingSpaces(token.content);
}
return token;
}
/**
* Returns the current line number
* Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character...
*
* @return the current line number
* @return true if the given or next character is a line-terminator
*/
long getCurrentLineNumber() {
return reader.getCurrentLineNumber();
}
boolean readEndOfLine(int ch) throws IOException {
// check if we have \r\n...
if (ch == CR && reader.lookAhead() == LF) {
// note: does not change ch outside of this method!
ch = reader.read();
// Save the EOL state
if (firstEol == null) {
this.firstEol = Constants.CRLF;
}
}
// save EOL state here.
if (firstEol == null) {
if (ch == LF) {
this.firstEol = LF_STRING;
} else if (ch == CR) {
this.firstEol = CR_STRING;
}
}
/**
* Returns the current character position
*
* @return the current character position
*/
long getCharacterPosition() {
return reader.getPosition();
return ch == LF || ch == CR;
}
// TODO escape handling needs more work
@ -369,93 +458,4 @@ final class Lexer implements Closeable {
buffer.setLength(length);
}
}
/**
* Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character...
*
* @return true if the given or next character is a line-terminator
*/
boolean readEndOfLine(int ch) throws IOException {
// check if we have \r\n...
if (ch == CR && reader.lookAhead() == LF) {
// note: does not change ch outside of this method!
ch = reader.read();
// Save the EOL state
if (firstEol == null) {
this.firstEol = Constants.CRLF;
}
}
// save EOL state here.
if (firstEol == null) {
if (ch == LF) {
this.firstEol = LF_STRING;
} else if (ch == CR) {
this.firstEol = CR_STRING;
}
}
return ch == LF || ch == CR;
}
boolean isClosed() {
return reader.isClosed();
}
/**
* @return true if the given char is a whitespace character
*/
boolean isWhitespace(final int ch) {
return !isDelimiter(ch) && Character.isWhitespace((char) ch);
}
/**
* Checks if the current character represents the start of a line: a CR, LF or is at the start of the file.
*
* @param ch the character to check
* @return true if the character is at the start of a line.
*/
boolean isStartOfLine(final int ch) {
return ch == LF || ch == CR || ch == UNDEFINED;
}
/**
* @return true if the given character indicates end of file
*/
boolean isEndOfFile(final int ch) {
return ch == END_OF_STREAM;
}
boolean isDelimiter(final int ch) {
return ch == delimiter;
}
boolean isEscape(final int ch) {
return ch == escape;
}
boolean isQuoteChar(final int ch) {
return ch == quoteChar;
}
boolean isCommentStart(final int ch) {
return ch == commentStart;
}
private boolean isMetaChar(final int ch) {
return ch == delimiter ||
ch == escape ||
ch == quoteChar ||
ch == commentStart;
}
/**
* Closes resources.
*
* @throws IOException
* If an I/O error occurs
*/
@Override
public void close() throws IOException {
reader.close();
}
}

View File

@ -26,9 +26,6 @@ import static org.apache.commons.csv.Token.Type.INVALID;
*/
final class Token {
/** length of the initial token (content-)buffer */
private static final int INITIAL_TOKEN_LENGTH = 50;
enum Type {
/** Token has no valid content, i.e. is in its initialized state. */
INVALID,
@ -46,6 +43,9 @@ final class Token {
COMMENT
}
/** length of the initial token (content-)buffer */
private static final int INITIAL_TOKEN_LENGTH = 50;
/** Token type */
Token.Type type = INVALID;