From 3ac702b190fd04c56118cb03aa87577f0a6a86f7 Mon Sep 17 00:00:00 2001 From: belugabehr <12578579+belugabehr@users.noreply.github.com> Date: Thu, 15 Jul 2021 09:41:22 -0400 Subject: [PATCH] CSV-278: Reuse Buffers in Lexer for Delimiter Detection (#162) * CSV-278: Reuse Buffers in Lexer for Delimiter Detection * Remove erroneous tab character * Reduce change set with fewer formatting changes * Reduce change set with fewer formatting changes --- .../commons/csv/ExtendedBufferedReader.java | 15 +++++++++++ .../java/org/apache/commons/csv/Lexer.java | 26 ++++++++++--------- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java index 63efb640..4b3fb483 100644 --- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java +++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java @@ -132,6 +132,21 @@ final class ExtendedBufferedReader extends BufferedReader { */ char[] lookAhead(final int n) throws IOException { final char[] buf = new char[n]; + return lookAhead(buf); + } + + /** + * Populates the buffer with the next {@code buf.length} characters in the + * current reader without consuming them. The next call to {@link #read()} will + * still return the next value. This doesn't affect line number or last + * character. + * + * @param buf the buffer to fill for the look ahead. + * @return the buffer itself + * @throws IOException If an I/O error occurs + */ + char[] lookAhead(final char[] buf) throws IOException { + final int n = buf.length; super.mark(n); super.read(buf, 0, n); super.reset(); diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java index cfd64fcd..a8afc501 100644 --- a/src/main/java/org/apache/commons/csv/Lexer.java +++ b/src/main/java/org/apache/commons/csv/Lexer.java @@ -49,6 +49,8 @@ final class Lexer implements Closeable { private static final char DISABLED = '\ufffe'; private final char[] delimiter; + private final char[] delimiterBuf; + private final char[] escapeDelimiterBuf; private final char escape; private final char quoteChar; private final char commentStart; @@ -68,6 +70,8 @@ final class Lexer implements Closeable { this.commentStart = mapNullToDisabled(format.getCommentMarker()); this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces(); this.ignoreEmptyLines = format.getIgnoreEmptyLines(); + this.delimiterBuf = new char[delimiter.length - 1]; + this.escapeDelimiterBuf = new char[2 * delimiter.length - 1]; } /** @@ -112,7 +116,7 @@ final class Lexer implements Closeable { } /** - * Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(int)} + * Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}. * * @param ch * the current character. @@ -126,14 +130,13 @@ final class Lexer implements Closeable { if (delimiter.length == 1) { return true; } - final int len = delimiter.length - 1; - final char[] buf = reader.lookAhead(len); - for (int i = 0; i < len; i++) { - if (buf[i] != delimiter[i+1]) { + reader.lookAhead(delimiterBuf); + for (int i = 0; i < delimiterBuf.length; i++) { + if (delimiterBuf[i] != delimiter[i+1]) { return false; } } - final int count = reader.read(buf, 0, len); + final int count = reader.read(delimiterBuf, 0, delimiterBuf.length); return count != END_OF_STREAM; } @@ -156,7 +159,7 @@ final class Lexer implements Closeable { } /** - * Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(int)}. + * Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}. * * For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]". * @@ -164,17 +167,16 @@ final class Lexer implements Closeable { * @throws IOException If an I/O error occurs. */ boolean isEscapeDelimiter() throws IOException { - final int len = 2 * delimiter.length - 1; - final char[] buf = reader.lookAhead(len); - if (buf[0] != delimiter[0]) { + reader.lookAhead(escapeDelimiterBuf); + if (escapeDelimiterBuf[0] != delimiter[0]) { return false; } for (int i = 1; i < delimiter.length; i++) { - if (buf[2 * i] != delimiter[i] || buf[2 * i - 1] != escape) { + if (escapeDelimiterBuf[2 * i] != delimiter[i] || escapeDelimiterBuf[2 * i - 1] != escape) { return false; } } - final int count = reader.read(buf, 0, len); + final int count = reader.read(escapeDelimiterBuf, 0, escapeDelimiterBuf.length); return count != END_OF_STREAM; }