CSV-278: Reuse Buffers in Lexer for Delimiter Detection (#162)

* CSV-278: Reuse Buffers in Lexer for Delimiter Detection

* Remove erroneous tab character

* Reduce change set with fewer formatting changes

* Reduce change set with fewer formatting changes
This commit is contained in:
belugabehr 2021-07-15 09:41:22 -04:00 committed by GitHub
parent a4e005fdf5
commit 3ac702b190
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 12 deletions

View File

@ -132,6 +132,21 @@ final class ExtendedBufferedReader extends BufferedReader {
*/ */
char[] lookAhead(final int n) throws IOException { char[] lookAhead(final int n) throws IOException {
final char[] buf = new char[n]; final char[] buf = new char[n];
return lookAhead(buf);
}
/**
* Populates the buffer with the next {@code buf.length} characters in the
* current reader without consuming them. The next call to {@link #read()} will
* still return the next value. This doesn't affect line number or last
* character.
*
* @param buf the buffer to fill for the look ahead.
* @return the buffer itself
* @throws IOException If an I/O error occurs
*/
char[] lookAhead(final char[] buf) throws IOException {
final int n = buf.length;
super.mark(n); super.mark(n);
super.read(buf, 0, n); super.read(buf, 0, n);
super.reset(); super.reset();

View File

@ -49,6 +49,8 @@ final class Lexer implements Closeable {
private static final char DISABLED = '\ufffe'; private static final char DISABLED = '\ufffe';
private final char[] delimiter; private final char[] delimiter;
private final char[] delimiterBuf;
private final char[] escapeDelimiterBuf;
private final char escape; private final char escape;
private final char quoteChar; private final char quoteChar;
private final char commentStart; private final char commentStart;
@ -68,6 +70,8 @@ final class Lexer implements Closeable {
this.commentStart = mapNullToDisabled(format.getCommentMarker()); this.commentStart = mapNullToDisabled(format.getCommentMarker());
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces(); this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
this.ignoreEmptyLines = format.getIgnoreEmptyLines(); this.ignoreEmptyLines = format.getIgnoreEmptyLines();
this.delimiterBuf = new char[delimiter.length - 1];
this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
} }
/** /**
@ -112,7 +116,7 @@ final class Lexer implements Closeable {
} }
/** /**
* Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(int)} * Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}.
* *
* @param ch * @param ch
* the current character. * the current character.
@ -126,14 +130,13 @@ final class Lexer implements Closeable {
if (delimiter.length == 1) { if (delimiter.length == 1) {
return true; return true;
} }
final int len = delimiter.length - 1; reader.lookAhead(delimiterBuf);
final char[] buf = reader.lookAhead(len); for (int i = 0; i < delimiterBuf.length; i++) {
for (int i = 0; i < len; i++) { if (delimiterBuf[i] != delimiter[i+1]) {
if (buf[i] != delimiter[i+1]) {
return false; return false;
} }
} }
final int count = reader.read(buf, 0, len); final int count = reader.read(delimiterBuf, 0, delimiterBuf.length);
return count != END_OF_STREAM; return count != END_OF_STREAM;
} }
@ -156,7 +159,7 @@ final class Lexer implements Closeable {
} }
/** /**
* Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(int)}. * Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}.
* *
* For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]". * For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]".
* *
@ -164,17 +167,16 @@ final class Lexer implements Closeable {
* @throws IOException If an I/O error occurs. * @throws IOException If an I/O error occurs.
*/ */
boolean isEscapeDelimiter() throws IOException { boolean isEscapeDelimiter() throws IOException {
final int len = 2 * delimiter.length - 1; reader.lookAhead(escapeDelimiterBuf);
final char[] buf = reader.lookAhead(len); if (escapeDelimiterBuf[0] != delimiter[0]) {
if (buf[0] != delimiter[0]) {
return false; return false;
} }
for (int i = 1; i < delimiter.length; i++) { for (int i = 1; i < delimiter.length; i++) {
if (buf[2 * i] != delimiter[i] || buf[2 * i - 1] != escape) { if (escapeDelimiterBuf[2 * i] != delimiter[i] || escapeDelimiterBuf[2 * i - 1] != escape) {
return false; return false;
} }
} }
final int count = reader.read(buf, 0, len); final int count = reader.read(escapeDelimiterBuf, 0, escapeDelimiterBuf.length);
return count != END_OF_STREAM; return count != END_OF_STREAM;
} }