Make it easy to provide an alternative lexer if required

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1303620 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sebastian Bazley 2012-03-21 23:46:44 +00:00
parent 7592782eee
commit 65ab9db952
3 changed files with 106 additions and 71 deletions

View File

@ -21,24 +21,14 @@ import java.io.IOException;
import static org.apache.commons.csv.Token.Type.*;
class CSVLexer {
class CSVLexer extends Lexer {
private final StringBuilder wsBuf = new StringBuilder();
private final CSVFormat format;
/** The input stream */
private final ExtendedBufferedReader in;
CSVLexer(CSVFormat format, ExtendedBufferedReader in) {
this.format = format;
this.in = in;
super(format, in);
}
public int getLineNumber() {
return in.getLineNumber();
}
/**
* Returns the next token.
* <p/>
@ -48,6 +38,7 @@ class CSVLexer {
* @return the next token found
* @throws java.io.IOException on stream access error
*/
@Override
Token nextToken(Token tkn) throws IOException {
wsBuf.setLength(0); // reuse
@ -182,16 +173,6 @@ class CSVLexer {
return tkn;
}
private void trimTrailingSpaces(StringBuilder buffer) {
int length = buffer.length();
while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
length = length - 1;
}
if (length != buffer.length()) {
buffer.setLength(length);
}
}
/**
* An encapsulated token lexer
* <p/>
@ -253,51 +234,4 @@ class CSVLexer {
}
}
private int readEscape(int c) throws IOException {
// assume c is the escape char (normally a backslash)
c = in.read();
switch (c) {
case 'r':
return '\r';
case 'n':
return '\n';
case 't':
return '\t';
case 'b':
return '\b';
case 'f':
return '\f';
default:
return c;
}
}
/**
* @return true if the given char is a whitespace character
*/
private boolean isWhitespace(int c) {
return (c != format.getDelimiter()) && Character.isWhitespace((char) c);
}
/**
* Greedy - accepts \n, \r and \r\n
* This checker consumes silently the second control-character...
*
* @return true if the given character is a line-terminator
*/
private boolean isEndOfLine(int c) throws IOException {
// check if we have \r\n...
if (c == '\r' && in.lookAhead() == '\n') {
// note: does not change c outside of this method !!
c = in.read();
}
return (c == '\n' || c == '\r');
}
/**
* @return true if the given character indicates end of file
*/
private boolean isEndOfFile(int c) {
return c == ExtendedBufferedReader.END_OF_STREAM;
}
}

View File

@ -62,7 +62,7 @@ import static org.apache.commons.csv.Token.Type.*;
*/
public class CSVParser implements Iterable<CSVRecord> {
private final CSVLexer lexer;
private final Lexer lexer;
private final Map<String, Integer> headerMapping;
// the following objects are shared to reduce garbage

View File

@ -0,0 +1,101 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.commons.csv;
import java.io.IOException;
/**
* Abstract lexer class; contains common utility routines shared by lexers
*/
abstract class Lexer {
final CSVFormat format;
/** The input stream */
final ExtendedBufferedReader in;
Lexer(CSVFormat format, ExtendedBufferedReader in) {
this.format = format;
this.in = in;
}
int getLineNumber() {
return in.getLineNumber();
}
int readEscape(int c) throws IOException {
// assume c is the escape char (normally a backslash)
c = in.read();
switch (c) {
case 'r':
return '\r';
case 'n':
return '\n';
case 't':
return '\t';
case 'b':
return '\b';
case 'f':
return '\f';
default:
return c;
}
}
void trimTrailingSpaces(StringBuilder buffer) {
int length = buffer.length();
while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
length = length - 1;
}
if (length != buffer.length()) {
buffer.setLength(length);
}
}
/**
* @return true if the given char is a whitespace character
*/
boolean isWhitespace(int c) {
return (c != format.getDelimiter()) && Character.isWhitespace((char) c);
}
/**
* Greedy - accepts \n, \r and \r\n
* This checker consumes silently the second control-character...
*
* @return true if the given character is a line-terminator
*/
boolean isEndOfLine(int c) throws IOException {
// check if we have \r\n...
if (c == '\r' && in.lookAhead() == '\n') {
// note: does not change c outside of this method !!
c = in.read();
}
return (c == '\n' || c == '\r');
}
/**
* @return true if the given character indicates end of file
*/
boolean isEndOfFile(int c) {
return c == ExtendedBufferedReader.END_OF_STREAM;
}
abstract Token nextToken(Token reusableToken) throws IOException;
}