Make it easy to provide an alternative lexer if required

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1303620 13f79535-47bb-0310-9956-ffa450edef68
2012-03-21 23:46:44 +00:00 · 2012-03-21 23:46:44 +00:00 · 65ab9db952
parent 7592782eee
commit 65ab9db952
3 changed files with 106 additions and 71 deletions
--- a/src/main/java/org/apache/commons/csv/CSVLexer.java
+++ b/src/main/java/org/apache/commons/csv/CSVLexer.java
@ -21,24 +21,14 @@ import java.io.IOException;

 import static org.apache.commons.csv.Token.Type.*;

-class CSVLexer {
+class CSVLexer extends Lexer {

    private final StringBuilder wsBuf = new StringBuilder();
    
-    private final CSVFormat format;
-    
-    /** The input stream */
-    private final ExtendedBufferedReader in;
-
    CSVLexer(CSVFormat format, ExtendedBufferedReader in) {
-        this.format = format;
-        this.in = in;
+        super(format, in);
    }
-
-    public int getLineNumber() {
-        return in.getLineNumber();
-    }
-
+    
    /**
     * Returns the next token.
     * <p/>
@ -48,6 +38,7 @@ class CSVLexer {
     * @return the next token found
     * @throws java.io.IOException on stream access error
     */
+    @Override
    Token nextToken(Token tkn) throws IOException {
        wsBuf.setLength(0); // reuse

@ -182,16 +173,6 @@ class CSVLexer {
        return tkn;
    }

-    private void trimTrailingSpaces(StringBuilder buffer) {
-        int length = buffer.length();
-        while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
-            length = length - 1;
-        }
-        if (length != buffer.length()) {
-            buffer.setLength(length);
-        }
-    }
-
    /**
     * An encapsulated token lexer
     * <p/>
@ -253,51 +234,4 @@ class CSVLexer {
        }
    }

-    private int readEscape(int c) throws IOException {
-        // assume c is the escape char (normally a backslash)
-        c = in.read();
-        switch (c) {
-            case 'r':
-                return '\r';
-            case 'n':
-                return '\n';
-            case 't':
-                return '\t';
-            case 'b':
-                return '\b';
-            case 'f':
-                return '\f';
-            default:
-                return c;
-        }
-    }
-
-    /**
-     * @return true if the given char is a whitespace character
-     */
-    private boolean isWhitespace(int c) {
-        return (c != format.getDelimiter()) && Character.isWhitespace((char) c);
-    }
-
-    /**
-     * Greedy - accepts \n, \r and \r\n
-     * This checker consumes silently the second control-character...
-     *
-     * @return true if the given character is a line-terminator
-     */
-    private boolean isEndOfLine(int c) throws IOException {
-        // check if we have \r\n...
-        if (c == '\r' && in.lookAhead() == '\n') {
-            // note: does not change c outside of this method !!
-            c = in.read();
-        }
-        return (c == '\n' || c == '\r');
-    }
-
-    /**
-     * @return true if the given character indicates end of file
-     */
-    private boolean isEndOfFile(int c) {
-        return c == ExtendedBufferedReader.END_OF_STREAM;
-    }
 }
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@ -62,7 +62,7 @@ import static org.apache.commons.csv.Token.Type.*;
 */
 public class CSVParser implements Iterable<CSVRecord> {

-    private final CSVLexer lexer;
+    private final Lexer lexer;
    private final Map<String, Integer> headerMapping;

    // the following objects are shared to reduce garbage
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * 
+ */
+
+package org.apache.commons.csv;
+
+import java.io.IOException;
+
+/**
+ * Abstract lexer class; contains common utility routines shared by lexers
+ */
+abstract class Lexer {
+
+    final CSVFormat format;
+    
+    /** The input stream */
+    final ExtendedBufferedReader in;
+
+    Lexer(CSVFormat format, ExtendedBufferedReader in) {
+        this.format = format;
+        this.in = in;
+    }
+
+    int getLineNumber() {
+        return in.getLineNumber();
+    }
+
+    int readEscape(int c) throws IOException {
+        // assume c is the escape char (normally a backslash)
+        c = in.read();
+        switch (c) {
+            case 'r':
+                return '\r';
+            case 'n':
+                return '\n';
+            case 't':
+                return '\t';
+            case 'b':
+                return '\b';
+            case 'f':
+                return '\f';
+            default:
+                return c;
+        }
+    }
+
+    void trimTrailingSpaces(StringBuilder buffer) {
+        int length = buffer.length();
+        while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
+            length = length - 1;
+        }
+        if (length != buffer.length()) {
+            buffer.setLength(length);
+        }
+    }
+
+    /**
+     * @return true if the given char is a whitespace character
+     */
+    boolean isWhitespace(int c) {
+        return (c != format.getDelimiter()) && Character.isWhitespace((char) c);
+    }
+
+    /**
+     * Greedy - accepts \n, \r and \r\n
+     * This checker consumes silently the second control-character...
+     *
+     * @return true if the given character is a line-terminator
+     */
+    boolean isEndOfLine(int c) throws IOException {
+        // check if we have \r\n...
+        if (c == '\r' && in.lookAhead() == '\n') {
+            // note: does not change c outside of this method !!
+            c = in.read();
+        }
+        return (c == '\n' || c == '\r');
+    }
+
+    /**
+     * @return true if the given character indicates end of file
+     */
+    boolean isEndOfFile(int c) {
+        return c == ExtendedBufferedReader.END_OF_STREAM;
+    }
+
+    abstract Token nextToken(Token reusableToken) throws IOException;
+}