CSV-278: Reuse Buffers in Lexer for Delimiter Detection (#162)
* CSV-278: Reuse Buffers in Lexer for Delimiter Detection * Remove erroneous tab character * Reduce change set with fewer formatting changes * Reduce change set with fewer formatting changes
This commit is contained in:
parent
a4e005fdf5
commit
3ac702b190
|
@ -132,6 +132,21 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||||
*/
|
*/
|
||||||
char[] lookAhead(final int n) throws IOException {
|
char[] lookAhead(final int n) throws IOException {
|
||||||
final char[] buf = new char[n];
|
final char[] buf = new char[n];
|
||||||
|
return lookAhead(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Populates the buffer with the next {@code buf.length} characters in the
|
||||||
|
* current reader without consuming them. The next call to {@link #read()} will
|
||||||
|
* still return the next value. This doesn't affect line number or last
|
||||||
|
* character.
|
||||||
|
*
|
||||||
|
* @param buf the buffer to fill for the look ahead.
|
||||||
|
* @return the buffer itself
|
||||||
|
* @throws IOException If an I/O error occurs
|
||||||
|
*/
|
||||||
|
char[] lookAhead(final char[] buf) throws IOException {
|
||||||
|
final int n = buf.length;
|
||||||
super.mark(n);
|
super.mark(n);
|
||||||
super.read(buf, 0, n);
|
super.read(buf, 0, n);
|
||||||
super.reset();
|
super.reset();
|
||||||
|
|
|
@ -49,6 +49,8 @@ final class Lexer implements Closeable {
|
||||||
private static final char DISABLED = '\ufffe';
|
private static final char DISABLED = '\ufffe';
|
||||||
|
|
||||||
private final char[] delimiter;
|
private final char[] delimiter;
|
||||||
|
private final char[] delimiterBuf;
|
||||||
|
private final char[] escapeDelimiterBuf;
|
||||||
private final char escape;
|
private final char escape;
|
||||||
private final char quoteChar;
|
private final char quoteChar;
|
||||||
private final char commentStart;
|
private final char commentStart;
|
||||||
|
@ -68,6 +70,8 @@ final class Lexer implements Closeable {
|
||||||
this.commentStart = mapNullToDisabled(format.getCommentMarker());
|
this.commentStart = mapNullToDisabled(format.getCommentMarker());
|
||||||
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
|
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
|
||||||
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
|
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
|
||||||
|
this.delimiterBuf = new char[delimiter.length - 1];
|
||||||
|
this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -112,7 +116,7 @@ final class Lexer implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(int)}
|
* Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}.
|
||||||
*
|
*
|
||||||
* @param ch
|
* @param ch
|
||||||
* the current character.
|
* the current character.
|
||||||
|
@ -126,14 +130,13 @@ final class Lexer implements Closeable {
|
||||||
if (delimiter.length == 1) {
|
if (delimiter.length == 1) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
final int len = delimiter.length - 1;
|
reader.lookAhead(delimiterBuf);
|
||||||
final char[] buf = reader.lookAhead(len);
|
for (int i = 0; i < delimiterBuf.length; i++) {
|
||||||
for (int i = 0; i < len; i++) {
|
if (delimiterBuf[i] != delimiter[i+1]) {
|
||||||
if (buf[i] != delimiter[i+1]) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final int count = reader.read(buf, 0, len);
|
final int count = reader.read(delimiterBuf, 0, delimiterBuf.length);
|
||||||
return count != END_OF_STREAM;
|
return count != END_OF_STREAM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,7 +159,7 @@ final class Lexer implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(int)}.
|
* Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(char[])}.
|
||||||
*
|
*
|
||||||
* For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]".
|
* For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]".
|
||||||
*
|
*
|
||||||
|
@ -164,17 +167,16 @@ final class Lexer implements Closeable {
|
||||||
* @throws IOException If an I/O error occurs.
|
* @throws IOException If an I/O error occurs.
|
||||||
*/
|
*/
|
||||||
boolean isEscapeDelimiter() throws IOException {
|
boolean isEscapeDelimiter() throws IOException {
|
||||||
final int len = 2 * delimiter.length - 1;
|
reader.lookAhead(escapeDelimiterBuf);
|
||||||
final char[] buf = reader.lookAhead(len);
|
if (escapeDelimiterBuf[0] != delimiter[0]) {
|
||||||
if (buf[0] != delimiter[0]) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (int i = 1; i < delimiter.length; i++) {
|
for (int i = 1; i < delimiter.length; i++) {
|
||||||
if (buf[2 * i] != delimiter[i] || buf[2 * i - 1] != escape) {
|
if (escapeDelimiterBuf[2 * i] != delimiter[i] || escapeDelimiterBuf[2 * i - 1] != escape) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final int count = reader.read(buf, 0, len);
|
final int count = reader.read(escapeDelimiterBuf, 0, escapeDelimiterBuf.length);
|
||||||
return count != END_OF_STREAM;
|
return count != END_OF_STREAM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue