Moved the lexer in a separate file
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1300850 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
35b954ed36
commit
38670dbe92
|
@ -0,0 +1,344 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.apache.commons.csv.CSVLexer.Token.Type.*;
|
||||
|
||||
class CSVLexer {
|
||||
|
||||
/** length of the initial token (content-)buffer */
|
||||
private static final int INITIAL_TOKEN_LENGTH = 50;
|
||||
|
||||
private final StringBuilder wsBuf = new StringBuilder();
|
||||
|
||||
private final CSVFormat format;
|
||||
|
||||
/** The input stream */
|
||||
private final ExtendedBufferedReader in;
|
||||
|
||||
/**
|
||||
* Token is an internal token representation.
|
||||
* <p/>
|
||||
* It is used as contract between the lexer and the parser.
|
||||
*/
|
||||
static class Token {
|
||||
|
||||
enum Type {
|
||||
/** Token has no valid content, i.e. is in its initialized state. */
|
||||
INVALID,
|
||||
|
||||
/** Token with content, at beginning or in the middle of a line. */
|
||||
TOKEN,
|
||||
|
||||
/** Token (which can have content) when end of file is reached. */
|
||||
EOF,
|
||||
|
||||
/** Token with content when end of a line is reached. */
|
||||
EORECORD
|
||||
}
|
||||
|
||||
/** Token type */
|
||||
Type type = INVALID;
|
||||
|
||||
/** The content buffer. */
|
||||
StringBuilder content = new StringBuilder(INITIAL_TOKEN_LENGTH);
|
||||
|
||||
/** Token ready flag: indicates a valid token with content (ready for the parser). */
|
||||
boolean isReady;
|
||||
|
||||
Token reset() {
|
||||
content.setLength(0);
|
||||
type = INVALID;
|
||||
isReady = false;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
CSVLexer(CSVFormat format, ExtendedBufferedReader in) {
|
||||
this.format = format;
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
public int getLineNumber() {
|
||||
return in.getLineNumber();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next token.
|
||||
* <p/>
|
||||
* A token corresponds to a term, a record change or an end-of-file indicator.
|
||||
*
|
||||
* @param tkn an existing Token object to reuse. The caller is responsible to initialize the Token.
|
||||
* @return the next token found
|
||||
* @throws java.io.IOException on stream access error
|
||||
*/
|
||||
Token nextToken(Token tkn) throws IOException {
|
||||
wsBuf.setLength(0); // reuse
|
||||
|
||||
// get the last read char (required for empty line detection)
|
||||
int lastChar = in.readAgain();
|
||||
|
||||
// read the next char and set eol
|
||||
/* note: unfortunately isEndOfLine may consumes a character silently.
|
||||
* this has no effect outside of the method. so a simple workaround
|
||||
* is to call 'readAgain' on the stream...
|
||||
*/
|
||||
int c = in.read();
|
||||
boolean eol = isEndOfLine(c);
|
||||
c = in.readAgain();
|
||||
|
||||
// empty line detection: eol AND (last char was EOL or beginning)
|
||||
if (format.isEmptyLinesIgnored()) {
|
||||
while (eol
|
||||
&& (lastChar == '\n' || lastChar == '\r' || lastChar == ExtendedBufferedReader.UNDEFINED)
|
||||
&& !isEndOfFile(lastChar)) {
|
||||
// go on char ahead ...
|
||||
lastChar = c;
|
||||
c = in.read();
|
||||
eol = isEndOfLine(c);
|
||||
c = in.readAgain();
|
||||
// reached end of file without any content (empty line at the end)
|
||||
if (isEndOfFile(c)) {
|
||||
tkn.type = EOF;
|
||||
return tkn;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// did we reach eof during the last iteration already ? EOF
|
||||
if (isEndOfFile(lastChar) || (lastChar != format.getDelimiter() && isEndOfFile(c))) {
|
||||
tkn.type = EOF;
|
||||
return tkn;
|
||||
}
|
||||
|
||||
// important: make sure a new char gets consumed in each iteration
|
||||
while (!tkn.isReady && tkn.type != EOF) {
|
||||
// ignore whitespaces at beginning of a token
|
||||
if (format.isLeadingSpacesIgnored()) {
|
||||
while (isWhitespace(c) && !eol) {
|
||||
wsBuf.append((char) c);
|
||||
c = in.read();
|
||||
eol = isEndOfLine(c);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, start of token reached: comment, encapsulated, or token
|
||||
if (c == format.getCommentStart()) {
|
||||
// ignore everything till end of line and continue (incr linecount)
|
||||
in.readLine();
|
||||
tkn = nextToken(tkn.reset());
|
||||
} else if (c == format.getDelimiter()) {
|
||||
// empty token return TOKEN("")
|
||||
tkn.type = TOKEN;
|
||||
tkn.isReady = true;
|
||||
} else if (eol) {
|
||||
// empty token return EORECORD("")
|
||||
//noop: tkn.content.append("");
|
||||
tkn.type = EORECORD;
|
||||
tkn.isReady = true;
|
||||
} else if (c == format.getEncapsulator()) {
|
||||
// consume encapsulated token
|
||||
encapsulatedTokenLexer(tkn, c);
|
||||
} else if (isEndOfFile(c)) {
|
||||
// end of file return EOF()
|
||||
//noop: tkn.content.append("");
|
||||
tkn.type = EOF;
|
||||
tkn.isReady = true;
|
||||
} else {
|
||||
// next token must be a simple token
|
||||
// add removed blanks when not ignoring whitespace chars...
|
||||
if (!format.isLeadingSpacesIgnored()) {
|
||||
tkn.content.append(wsBuf);
|
||||
}
|
||||
simpleTokenLexer(tkn, c);
|
||||
}
|
||||
}
|
||||
return tkn;
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple token lexer
|
||||
* <p/>
|
||||
* Simple token are tokens which are not surrounded by encapsulators.
|
||||
* A simple token might contain escaped delimiters (as \, or \;). The
|
||||
* token is finished when one of the following conditions become true:
|
||||
* <ul>
|
||||
* <li>end of line has been reached (EORECORD)</li>
|
||||
* <li>end of stream has been reached (EOF)</li>
|
||||
* <li>an unescaped delimiter has been reached (TOKEN)</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param tkn the current token
|
||||
* @param c the current character
|
||||
* @return the filled token
|
||||
* @throws IOException on stream access error
|
||||
*/
|
||||
private Token simpleTokenLexer(Token tkn, int c) throws IOException {
|
||||
while (true) {
|
||||
if (isEndOfLine(c)) {
|
||||
// end of record
|
||||
tkn.type = EORECORD;
|
||||
tkn.isReady = true;
|
||||
break;
|
||||
} else if (isEndOfFile(c)) {
|
||||
// end of file
|
||||
tkn.type = EOF;
|
||||
tkn.isReady = true;
|
||||
break;
|
||||
} else if (c == format.getDelimiter()) {
|
||||
// end of token
|
||||
tkn.type = TOKEN;
|
||||
tkn.isReady = true;
|
||||
break;
|
||||
} else if (c == format.getEscape()) {
|
||||
tkn.content.append((char) readEscape(c));
|
||||
} else {
|
||||
tkn.content.append((char) c);
|
||||
}
|
||||
|
||||
c = in.read();
|
||||
}
|
||||
|
||||
if (format.isTrailingSpacesIgnored()) {
|
||||
trimTrailingSpaces(tkn.content);
|
||||
}
|
||||
|
||||
return tkn;
|
||||
}
|
||||
|
||||
private void trimTrailingSpaces(StringBuilder buffer) {
|
||||
int length = buffer.length();
|
||||
while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
|
||||
length = length - 1;
|
||||
}
|
||||
if (length != buffer.length()) {
|
||||
buffer.setLength(length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An encapsulated token lexer
|
||||
* <p/>
|
||||
* Encapsulated tokens are surrounded by the given encapsulating-string.
|
||||
* The encapsulator itself might be included in the token using a
|
||||
* doubling syntax (as "", '') or using escaping (as in \", \').
|
||||
* Whitespaces before and after an encapsulated token are ignored.
|
||||
*
|
||||
* @param tkn the current token
|
||||
* @param c the current character
|
||||
* @return a valid token object
|
||||
* @throws IOException on invalid state
|
||||
*/
|
||||
private Token encapsulatedTokenLexer(Token tkn, int c) throws IOException {
|
||||
// save current line
|
||||
int startLineNumber = getLineNumber();
|
||||
// ignore the given delimiter
|
||||
// assert c == delimiter;
|
||||
while (true) {
|
||||
c = in.read();
|
||||
|
||||
if (c == format.getEscape()) {
|
||||
tkn.content.append((char) readEscape(c));
|
||||
} else if (c == format.getEncapsulator()) {
|
||||
if (in.lookAhead() == format.getEncapsulator()) {
|
||||
// double or escaped encapsulator -> add single encapsulator to token
|
||||
c = in.read();
|
||||
tkn.content.append((char) c);
|
||||
} else {
|
||||
// token finish mark (encapsulator) reached: ignore whitespace till delimiter
|
||||
while (true) {
|
||||
c = in.read();
|
||||
if (c == format.getDelimiter()) {
|
||||
tkn.type = TOKEN;
|
||||
tkn.isReady = true;
|
||||
return tkn;
|
||||
} else if (isEndOfFile(c)) {
|
||||
tkn.type = EOF;
|
||||
tkn.isReady = true;
|
||||
return tkn;
|
||||
} else if (isEndOfLine(c)) {
|
||||
// ok eo token reached
|
||||
tkn.type = EORECORD;
|
||||
tkn.isReady = true;
|
||||
return tkn;
|
||||
} else if (!isWhitespace(c)) {
|
||||
// error invalid char between token and next delimiter
|
||||
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (isEndOfFile(c)) {
|
||||
// error condition (end of file before end of token)
|
||||
throw new IOException("(startline " + startLineNumber + ") EOF reached before encapsulated token finished");
|
||||
} else {
|
||||
// consume character
|
||||
tkn.content.append((char) c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int readEscape(int c) throws IOException {
|
||||
// assume c is the escape char (normally a backslash)
|
||||
c = in.read();
|
||||
switch (c) {
|
||||
case 'r':
|
||||
return '\r';
|
||||
case 'n':
|
||||
return '\n';
|
||||
case 't':
|
||||
return '\t';
|
||||
case 'b':
|
||||
return '\b';
|
||||
case 'f':
|
||||
return '\f';
|
||||
default:
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given char is a whitespace character
|
||||
*/
|
||||
private boolean isWhitespace(int c) {
|
||||
return (c != format.getDelimiter()) && Character.isWhitespace((char) c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Greedy - accepts \n, \r and \r\n
|
||||
* This checker consumes silently the second control-character...
|
||||
*
|
||||
* @return true if the given character is a line-terminator
|
||||
*/
|
||||
private boolean isEndOfLine(int c) throws IOException {
|
||||
// check if we have \r\n...
|
||||
if (c == '\r' && in.lookAhead() == '\n') {
|
||||
// note: does not change c outside of this method !!
|
||||
c = in.read();
|
||||
}
|
||||
return (c == '\n' || c == '\r');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given character indicates end of file
|
||||
*/
|
||||
private boolean isEndOfFile(int c) {
|
||||
return c == ExtendedBufferedReader.END_OF_STREAM;
|
||||
}
|
||||
}
|
|
@ -230,326 +230,3 @@ public class CSVParser implements Iterable<String[]> {
|
|||
return lexer.getLineNumber();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class CSVLexer {
|
||||
|
||||
/** length of the initial token (content-)buffer */
|
||||
private static final int INITIAL_TOKEN_LENGTH = 50;
|
||||
|
||||
private final StringBuilder wsBuf = new StringBuilder();
|
||||
|
||||
private final CSVFormat format;
|
||||
|
||||
/** The input stream */
|
||||
private final ExtendedBufferedReader in;
|
||||
|
||||
/**
|
||||
* Token is an internal token representation.
|
||||
* <p/>
|
||||
* It is used as contract between the lexer and the parser.
|
||||
*/
|
||||
static class Token {
|
||||
|
||||
enum Type {
|
||||
/** Token has no valid content, i.e. is in its initialized state. */
|
||||
INVALID,
|
||||
|
||||
/** Token with content, at beginning or in the middle of a line. */
|
||||
TOKEN,
|
||||
|
||||
/** Token (which can have content) when end of file is reached. */
|
||||
EOF,
|
||||
|
||||
/** Token with content when end of a line is reached. */
|
||||
EORECORD
|
||||
}
|
||||
|
||||
/** Token type */
|
||||
Type type = INVALID;
|
||||
|
||||
/** The content buffer. */
|
||||
StringBuilder content = new StringBuilder(INITIAL_TOKEN_LENGTH);
|
||||
|
||||
/** Token ready flag: indicates a valid token with content (ready for the parser). */
|
||||
boolean isReady;
|
||||
|
||||
Token reset() {
|
||||
content.setLength(0);
|
||||
type = INVALID;
|
||||
isReady = false;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
CSVLexer(CSVFormat format, ExtendedBufferedReader in) {
|
||||
this.format = format;
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
public int getLineNumber() {
|
||||
return in.getLineNumber();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next token.
|
||||
* <p/>
|
||||
* A token corresponds to a term, a record change or an end-of-file indicator.
|
||||
*
|
||||
* @param tkn an existing Token object to reuse. The caller is responsible to initialize the Token.
|
||||
* @return the next token found
|
||||
* @throws IOException on stream access error
|
||||
*/
|
||||
Token nextToken(Token tkn) throws IOException {
|
||||
wsBuf.setLength(0); // reuse
|
||||
|
||||
// get the last read char (required for empty line detection)
|
||||
int lastChar = in.readAgain();
|
||||
|
||||
// read the next char and set eol
|
||||
/* note: unfortunately isEndOfLine may consumes a character silently.
|
||||
* this has no effect outside of the method. so a simple workaround
|
||||
* is to call 'readAgain' on the stream...
|
||||
*/
|
||||
int c = in.read();
|
||||
boolean eol = isEndOfLine(c);
|
||||
c = in.readAgain();
|
||||
|
||||
// empty line detection: eol AND (last char was EOL or beginning)
|
||||
if (format.isEmptyLinesIgnored()) {
|
||||
while (eol
|
||||
&& (lastChar == '\n' || lastChar == '\r' || lastChar == ExtendedBufferedReader.UNDEFINED)
|
||||
&& !isEndOfFile(lastChar)) {
|
||||
// go on char ahead ...
|
||||
lastChar = c;
|
||||
c = in.read();
|
||||
eol = isEndOfLine(c);
|
||||
c = in.readAgain();
|
||||
// reached end of file without any content (empty line at the end)
|
||||
if (isEndOfFile(c)) {
|
||||
tkn.type = EOF;
|
||||
return tkn;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// did we reach eof during the last iteration already ? EOF
|
||||
if (isEndOfFile(lastChar) || (lastChar != format.getDelimiter() && isEndOfFile(c))) {
|
||||
tkn.type = EOF;
|
||||
return tkn;
|
||||
}
|
||||
|
||||
// important: make sure a new char gets consumed in each iteration
|
||||
while (!tkn.isReady && tkn.type != EOF) {
|
||||
// ignore whitespaces at beginning of a token
|
||||
if (format.isLeadingSpacesIgnored()) {
|
||||
while (isWhitespace(c) && !eol) {
|
||||
wsBuf.append((char) c);
|
||||
c = in.read();
|
||||
eol = isEndOfLine(c);
|
||||
}
|
||||
}
|
||||
|
||||
// ok, start of token reached: comment, encapsulated, or token
|
||||
if (c == format.getCommentStart()) {
|
||||
// ignore everything till end of line and continue (incr linecount)
|
||||
in.readLine();
|
||||
tkn = nextToken(tkn.reset());
|
||||
} else if (c == format.getDelimiter()) {
|
||||
// empty token return TOKEN("")
|
||||
tkn.type = TOKEN;
|
||||
tkn.isReady = true;
|
||||
} else if (eol) {
|
||||
// empty token return EORECORD("")
|
||||
//noop: tkn.content.append("");
|
||||
tkn.type = EORECORD;
|
||||
tkn.isReady = true;
|
||||
} else if (c == format.getEncapsulator()) {
|
||||
// consume encapsulated token
|
||||
encapsulatedTokenLexer(tkn, c);
|
||||
} else if (isEndOfFile(c)) {
|
||||
// end of file return EOF()
|
||||
//noop: tkn.content.append("");
|
||||
tkn.type = EOF;
|
||||
tkn.isReady = true;
|
||||
} else {
|
||||
// next token must be a simple token
|
||||
// add removed blanks when not ignoring whitespace chars...
|
||||
if (!format.isLeadingSpacesIgnored()) {
|
||||
tkn.content.append(wsBuf);
|
||||
}
|
||||
simpleTokenLexer(tkn, c);
|
||||
}
|
||||
}
|
||||
return tkn;
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple token lexer
|
||||
* <p/>
|
||||
* Simple token are tokens which are not surrounded by encapsulators.
|
||||
* A simple token might contain escaped delimiters (as \, or \;). The
|
||||
* token is finished when one of the following conditions become true:
|
||||
* <ul>
|
||||
* <li>end of line has been reached (EORECORD)</li>
|
||||
* <li>end of stream has been reached (EOF)</li>
|
||||
* <li>an unescaped delimiter has been reached (TOKEN)</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param tkn the current token
|
||||
* @param c the current character
|
||||
* @return the filled token
|
||||
* @throws IOException on stream access error
|
||||
*/
|
||||
private Token simpleTokenLexer(Token tkn, int c) throws IOException {
|
||||
while (true) {
|
||||
if (isEndOfLine(c)) {
|
||||
// end of record
|
||||
tkn.type = EORECORD;
|
||||
tkn.isReady = true;
|
||||
break;
|
||||
} else if (isEndOfFile(c)) {
|
||||
// end of file
|
||||
tkn.type = EOF;
|
||||
tkn.isReady = true;
|
||||
break;
|
||||
} else if (c == format.getDelimiter()) {
|
||||
// end of token
|
||||
tkn.type = TOKEN;
|
||||
tkn.isReady = true;
|
||||
break;
|
||||
} else if (c == format.getEscape()) {
|
||||
tkn.content.append((char) readEscape(c));
|
||||
} else {
|
||||
tkn.content.append((char) c);
|
||||
}
|
||||
|
||||
c = in.read();
|
||||
}
|
||||
|
||||
if (format.isTrailingSpacesIgnored()) {
|
||||
trimTrailingSpaces(tkn.content);
|
||||
}
|
||||
|
||||
return tkn;
|
||||
}
|
||||
|
||||
private void trimTrailingSpaces(StringBuilder buffer) {
|
||||
int length = buffer.length();
|
||||
while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
|
||||
length = length - 1;
|
||||
}
|
||||
if (length != buffer.length()) {
|
||||
buffer.setLength(length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An encapsulated token lexer
|
||||
* <p/>
|
||||
* Encapsulated tokens are surrounded by the given encapsulating-string.
|
||||
* The encapsulator itself might be included in the token using a
|
||||
* doubling syntax (as "", '') or using escaping (as in \", \').
|
||||
* Whitespaces before and after an encapsulated token are ignored.
|
||||
*
|
||||
* @param tkn the current token
|
||||
* @param c the current character
|
||||
* @return a valid token object
|
||||
* @throws IOException on invalid state
|
||||
*/
|
||||
private Token encapsulatedTokenLexer(Token tkn, int c) throws IOException {
|
||||
// save current line
|
||||
int startLineNumber = getLineNumber();
|
||||
// ignore the given delimiter
|
||||
// assert c == delimiter;
|
||||
while (true) {
|
||||
c = in.read();
|
||||
|
||||
if (c == format.getEscape()) {
|
||||
tkn.content.append((char) readEscape(c));
|
||||
} else if (c == format.getEncapsulator()) {
|
||||
if (in.lookAhead() == format.getEncapsulator()) {
|
||||
// double or escaped encapsulator -> add single encapsulator to token
|
||||
c = in.read();
|
||||
tkn.content.append((char) c);
|
||||
} else {
|
||||
// token finish mark (encapsulator) reached: ignore whitespace till delimiter
|
||||
while (true) {
|
||||
c = in.read();
|
||||
if (c == format.getDelimiter()) {
|
||||
tkn.type = TOKEN;
|
||||
tkn.isReady = true;
|
||||
return tkn;
|
||||
} else if (isEndOfFile(c)) {
|
||||
tkn.type = EOF;
|
||||
tkn.isReady = true;
|
||||
return tkn;
|
||||
} else if (isEndOfLine(c)) {
|
||||
// ok eo token reached
|
||||
tkn.type = EORECORD;
|
||||
tkn.isReady = true;
|
||||
return tkn;
|
||||
} else if (!isWhitespace(c)) {
|
||||
// error invalid char between token and next delimiter
|
||||
throw new IOException("(line " + getLineNumber() + ") invalid char between encapsulated token and delimiter");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (isEndOfFile(c)) {
|
||||
// error condition (end of file before end of token)
|
||||
throw new IOException("(startline " + startLineNumber + ") EOF reached before encapsulated token finished");
|
||||
} else {
|
||||
// consume character
|
||||
tkn.content.append((char) c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int readEscape(int c) throws IOException {
|
||||
// assume c is the escape char (normally a backslash)
|
||||
c = in.read();
|
||||
switch (c) {
|
||||
case 'r':
|
||||
return '\r';
|
||||
case 'n':
|
||||
return '\n';
|
||||
case 't':
|
||||
return '\t';
|
||||
case 'b':
|
||||
return '\b';
|
||||
case 'f':
|
||||
return '\f';
|
||||
default:
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given char is a whitespace character
|
||||
*/
|
||||
private boolean isWhitespace(int c) {
|
||||
return (c != format.getDelimiter()) && Character.isWhitespace((char) c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Greedy - accepts \n, \r and \r\n
|
||||
* This checker consumes silently the second control-character...
|
||||
*
|
||||
* @return true if the given character is a line-terminator
|
||||
*/
|
||||
private boolean isEndOfLine(int c) throws IOException {
|
||||
// check if we have \r\n...
|
||||
if (c == '\r' && in.lookAhead() == '\n') {
|
||||
// note: does not change c outside of this method !!
|
||||
c = in.read();
|
||||
}
|
||||
return (c == '\n' || c == '\r');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given character indicates end of file
|
||||
*/
|
||||
private boolean isEndOfFile(int c) {
|
||||
return c == ExtendedBufferedReader.END_OF_STREAM;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue