Make it easy to provide an alternative lexer if required
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1303620 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7592782eee
commit
65ab9db952
|
@ -21,24 +21,14 @@ import java.io.IOException;
|
|||
|
||||
import static org.apache.commons.csv.Token.Type.*;
|
||||
|
||||
class CSVLexer {
|
||||
class CSVLexer extends Lexer {
|
||||
|
||||
private final StringBuilder wsBuf = new StringBuilder();
|
||||
|
||||
private final CSVFormat format;
|
||||
|
||||
/** The input stream */
|
||||
private final ExtendedBufferedReader in;
|
||||
|
||||
CSVLexer(CSVFormat format, ExtendedBufferedReader in) {
|
||||
this.format = format;
|
||||
this.in = in;
|
||||
super(format, in);
|
||||
}
|
||||
|
||||
public int getLineNumber() {
|
||||
return in.getLineNumber();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the next token.
|
||||
* <p/>
|
||||
|
@ -48,6 +38,7 @@ class CSVLexer {
|
|||
* @return the next token found
|
||||
* @throws java.io.IOException on stream access error
|
||||
*/
|
||||
@Override
|
||||
Token nextToken(Token tkn) throws IOException {
|
||||
wsBuf.setLength(0); // reuse
|
||||
|
||||
|
@ -182,16 +173,6 @@ class CSVLexer {
|
|||
return tkn;
|
||||
}
|
||||
|
||||
private void trimTrailingSpaces(StringBuilder buffer) {
|
||||
int length = buffer.length();
|
||||
while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
|
||||
length = length - 1;
|
||||
}
|
||||
if (length != buffer.length()) {
|
||||
buffer.setLength(length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An encapsulated token lexer
|
||||
* <p/>
|
||||
|
@ -253,51 +234,4 @@ class CSVLexer {
|
|||
}
|
||||
}
|
||||
|
||||
private int readEscape(int c) throws IOException {
|
||||
// assume c is the escape char (normally a backslash)
|
||||
c = in.read();
|
||||
switch (c) {
|
||||
case 'r':
|
||||
return '\r';
|
||||
case 'n':
|
||||
return '\n';
|
||||
case 't':
|
||||
return '\t';
|
||||
case 'b':
|
||||
return '\b';
|
||||
case 'f':
|
||||
return '\f';
|
||||
default:
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given char is a whitespace character
|
||||
*/
|
||||
private boolean isWhitespace(int c) {
|
||||
return (c != format.getDelimiter()) && Character.isWhitespace((char) c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Greedy - accepts \n, \r and \r\n
|
||||
* This checker consumes silently the second control-character...
|
||||
*
|
||||
* @return true if the given character is a line-terminator
|
||||
*/
|
||||
private boolean isEndOfLine(int c) throws IOException {
|
||||
// check if we have \r\n...
|
||||
if (c == '\r' && in.lookAhead() == '\n') {
|
||||
// note: does not change c outside of this method !!
|
||||
c = in.read();
|
||||
}
|
||||
return (c == '\n' || c == '\r');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given character indicates end of file
|
||||
*/
|
||||
private boolean isEndOfFile(int c) {
|
||||
return c == ExtendedBufferedReader.END_OF_STREAM;
|
||||
}
|
||||
}
|
|
@ -62,7 +62,7 @@ import static org.apache.commons.csv.Token.Type.*;
|
|||
*/
|
||||
public class CSVParser implements Iterable<CSVRecord> {
|
||||
|
||||
private final CSVLexer lexer;
|
||||
private final Lexer lexer;
|
||||
private final Map<String, Integer> headerMapping;
|
||||
|
||||
// the following objects are shared to reduce garbage
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Abstract lexer class; contains common utility routines shared by lexers
|
||||
*/
|
||||
abstract class Lexer {
|
||||
|
||||
final CSVFormat format;
|
||||
|
||||
/** The input stream */
|
||||
final ExtendedBufferedReader in;
|
||||
|
||||
Lexer(CSVFormat format, ExtendedBufferedReader in) {
|
||||
this.format = format;
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
int getLineNumber() {
|
||||
return in.getLineNumber();
|
||||
}
|
||||
|
||||
int readEscape(int c) throws IOException {
|
||||
// assume c is the escape char (normally a backslash)
|
||||
c = in.read();
|
||||
switch (c) {
|
||||
case 'r':
|
||||
return '\r';
|
||||
case 'n':
|
||||
return '\n';
|
||||
case 't':
|
||||
return '\t';
|
||||
case 'b':
|
||||
return '\b';
|
||||
case 'f':
|
||||
return '\f';
|
||||
default:
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
void trimTrailingSpaces(StringBuilder buffer) {
|
||||
int length = buffer.length();
|
||||
while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
|
||||
length = length - 1;
|
||||
}
|
||||
if (length != buffer.length()) {
|
||||
buffer.setLength(length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given char is a whitespace character
|
||||
*/
|
||||
boolean isWhitespace(int c) {
|
||||
return (c != format.getDelimiter()) && Character.isWhitespace((char) c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Greedy - accepts \n, \r and \r\n
|
||||
* This checker consumes silently the second control-character...
|
||||
*
|
||||
* @return true if the given character is a line-terminator
|
||||
*/
|
||||
boolean isEndOfLine(int c) throws IOException {
|
||||
// check if we have \r\n...
|
||||
if (c == '\r' && in.lookAhead() == '\n') {
|
||||
// note: does not change c outside of this method !!
|
||||
c = in.read();
|
||||
}
|
||||
return (c == '\n' || c == '\r');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the given character indicates end of file
|
||||
*/
|
||||
boolean isEndOfFile(int c) {
|
||||
return c == ExtendedBufferedReader.END_OF_STREAM;
|
||||
}
|
||||
|
||||
abstract Token nextToken(Token reusableToken) throws IOException;
|
||||
}
|
Loading…
Reference in New Issue