Merge pull request #295 from DamjanJovanovic/master
Add support for trailing text after the closing quote, and EOF without a final closing quote, for Excel compatibility
This commit is contained in:
commit
b1bdb99c42
|
@ -206,8 +206,12 @@ public final class CSVFormat implements Serializable {
|
||||||
return new Builder(csvFormat);
|
return new Builder(csvFormat);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean allowEofWithoutClosingQuote;
|
||||||
|
|
||||||
private boolean allowMissingColumnNames;
|
private boolean allowMissingColumnNames;
|
||||||
|
|
||||||
|
private boolean allowTrailingText;
|
||||||
|
|
||||||
private boolean autoFlush;
|
private boolean autoFlush;
|
||||||
|
|
||||||
private Character commentMarker;
|
private Character commentMarker;
|
||||||
|
@ -264,6 +268,8 @@ public final class CSVFormat implements Serializable {
|
||||||
this.autoFlush = csvFormat.autoFlush;
|
this.autoFlush = csvFormat.autoFlush;
|
||||||
this.quotedNullString = csvFormat.quotedNullString;
|
this.quotedNullString = csvFormat.quotedNullString;
|
||||||
this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
|
this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
|
||||||
|
this.allowTrailingText = csvFormat.allowTrailingText;
|
||||||
|
this.allowEofWithoutClosingQuote = csvFormat.allowEofWithoutClosingQuote;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -288,6 +294,19 @@ public final class CSVFormat implements Serializable {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets whether the last field on the last line, if quoted, can have no closing quote when the file ends, {@code true} if this is ok,
|
||||||
|
* {@code false} if {@link IOException} should be thrown.
|
||||||
|
*
|
||||||
|
* @param allowEofWithoutClosingQuote whether to allow the last field on the last line to have a missing closing quote when the file ends,
|
||||||
|
* {@code true} if so, or {@code false} to cause an {@link IOException} to be thrown.
|
||||||
|
* @since 1.10.0
|
||||||
|
*/
|
||||||
|
public Builder setAllowEofWithoutClosingQuote(final boolean allowEofWithoutClosingQuote) {
|
||||||
|
this.allowEofWithoutClosingQuote = allowEofWithoutClosingQuote;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the parser missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an
|
* Sets the parser missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an
|
||||||
* {@link IllegalArgumentException} to be thrown.
|
* {@link IllegalArgumentException} to be thrown.
|
||||||
|
@ -301,6 +320,20 @@ public final class CSVFormat implements Serializable {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets whether to allow trailing text in a quoted field, after the closing quote.
|
||||||
|
*
|
||||||
|
* @param allowTrailingText the trailing text behavior, {@code true} to append that text to the field contents, {@code false} to throw
|
||||||
|
* an {@link IOException}.
|
||||||
|
*
|
||||||
|
* @return This instance.
|
||||||
|
* @since 1.10.0
|
||||||
|
*/
|
||||||
|
public Builder setAllowTrailingText(final boolean allowTrailingText) {
|
||||||
|
this.allowTrailingText = allowTrailingText;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets whether to flush on close.
|
* Sets whether to flush on close.
|
||||||
*
|
*
|
||||||
|
@ -810,7 +843,7 @@ public final class CSVFormat implements Serializable {
|
||||||
* @see Predefined#Default
|
* @see Predefined#Default
|
||||||
*/
|
*/
|
||||||
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false,
|
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false,
|
||||||
false, false, false, DuplicateHeaderMode.ALLOW_ALL);
|
false, false, false, DuplicateHeaderMode.ALLOW_ALL, false, false);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
|
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
|
||||||
|
@ -834,6 +867,8 @@ public final class CSVFormat implements Serializable {
|
||||||
* <li>{@code setIgnoreEmptyLines(false)}</li>
|
* <li>{@code setIgnoreEmptyLines(false)}</li>
|
||||||
* <li>{@code setAllowMissingColumnNames(true)}</li>
|
* <li>{@code setAllowMissingColumnNames(true)}</li>
|
||||||
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
|
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
|
||||||
|
* <li>{@code setAllowTrailingText(true)}</li>
|
||||||
|
* <li>{@code setAllowEofWithoutClosingQuote(true)}</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
* <p>
|
* <p>
|
||||||
* Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and
|
* Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and
|
||||||
|
@ -846,6 +881,8 @@ public final class CSVFormat implements Serializable {
|
||||||
public static final CSVFormat EXCEL = DEFAULT.builder()
|
public static final CSVFormat EXCEL = DEFAULT.builder()
|
||||||
.setIgnoreEmptyLines(false)
|
.setIgnoreEmptyLines(false)
|
||||||
.setAllowMissingColumnNames(true)
|
.setAllowMissingColumnNames(true)
|
||||||
|
.setAllowTrailingText(true)
|
||||||
|
.setAllowEofWithoutClosingQuote(true)
|
||||||
.build();
|
.build();
|
||||||
// @formatter:on
|
// @formatter:on
|
||||||
|
|
||||||
|
@ -1268,7 +1305,7 @@ public final class CSVFormat implements Serializable {
|
||||||
*/
|
*/
|
||||||
public static CSVFormat newFormat(final char delimiter) {
|
public static CSVFormat newFormat(final char delimiter) {
|
||||||
return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false,
|
return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false,
|
||||||
DuplicateHeaderMode.ALLOW_ALL);
|
DuplicateHeaderMode.ALLOW_ALL, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static String[] toStringArray(final Object[] values) {
|
static String[] toStringArray(final Object[] values) {
|
||||||
|
@ -1310,8 +1347,12 @@ public final class CSVFormat implements Serializable {
|
||||||
|
|
||||||
private final DuplicateHeaderMode duplicateHeaderMode;
|
private final DuplicateHeaderMode duplicateHeaderMode;
|
||||||
|
|
||||||
|
private final boolean allowEofWithoutClosingQuote;
|
||||||
|
|
||||||
private final boolean allowMissingColumnNames;
|
private final boolean allowMissingColumnNames;
|
||||||
|
|
||||||
|
private final boolean allowTrailingText;
|
||||||
|
|
||||||
private final boolean autoFlush;
|
private final boolean autoFlush;
|
||||||
|
|
||||||
private final Character commentMarker; // null if commenting is disabled
|
private final Character commentMarker; // null if commenting is disabled
|
||||||
|
@ -1366,6 +1407,8 @@ public final class CSVFormat implements Serializable {
|
||||||
this.autoFlush = builder.autoFlush;
|
this.autoFlush = builder.autoFlush;
|
||||||
this.quotedNullString = builder.quotedNullString;
|
this.quotedNullString = builder.quotedNullString;
|
||||||
this.duplicateHeaderMode = builder.duplicateHeaderMode;
|
this.duplicateHeaderMode = builder.duplicateHeaderMode;
|
||||||
|
this.allowTrailingText = builder.allowTrailingText;
|
||||||
|
this.allowEofWithoutClosingQuote = builder.allowEofWithoutClosingQuote;
|
||||||
validate();
|
validate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1396,7 +1439,7 @@ public final class CSVFormat implements Serializable {
|
||||||
final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
|
final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
|
||||||
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames,
|
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames,
|
||||||
final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush,
|
final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush,
|
||||||
final DuplicateHeaderMode duplicateHeaderMode) {
|
final DuplicateHeaderMode duplicateHeaderMode, final boolean allowTrailingText, final boolean allowEofWithoutClosingQuote) {
|
||||||
this.delimiter = delimiter;
|
this.delimiter = delimiter;
|
||||||
this.quoteCharacter = quoteChar;
|
this.quoteCharacter = quoteChar;
|
||||||
this.quoteMode = quoteMode;
|
this.quoteMode = quoteMode;
|
||||||
|
@ -1416,6 +1459,8 @@ public final class CSVFormat implements Serializable {
|
||||||
this.autoFlush = autoFlush;
|
this.autoFlush = autoFlush;
|
||||||
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
|
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
|
||||||
this.duplicateHeaderMode = duplicateHeaderMode;
|
this.duplicateHeaderMode = duplicateHeaderMode;
|
||||||
|
this.allowTrailingText = allowTrailingText;
|
||||||
|
this.allowEofWithoutClosingQuote = allowEofWithoutClosingQuote;
|
||||||
validate();
|
validate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1469,7 +1514,8 @@ public final class CSVFormat implements Serializable {
|
||||||
ignoreHeaderCase == other.ignoreHeaderCase && ignoreSurroundingSpaces == other.ignoreSurroundingSpaces &&
|
ignoreHeaderCase == other.ignoreHeaderCase && ignoreSurroundingSpaces == other.ignoreSurroundingSpaces &&
|
||||||
Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode == other.quoteMode &&
|
Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode == other.quoteMode &&
|
||||||
Objects.equals(quotedNullString, other.quotedNullString) && Objects.equals(recordSeparator, other.recordSeparator) &&
|
Objects.equals(quotedNullString, other.quotedNullString) && Objects.equals(recordSeparator, other.recordSeparator) &&
|
||||||
skipHeaderRecord == other.skipHeaderRecord && trailingDelimiter == other.trailingDelimiter && trim == other.trim;
|
skipHeaderRecord == other.skipHeaderRecord && trailingDelimiter == other.trailingDelimiter && trim == other.trim &&
|
||||||
|
allowTrailingText == other.allowTrailingText && allowEofWithoutClosingQuote == other.allowEofWithoutClosingQuote;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1503,6 +1549,16 @@ public final class CSVFormat implements Serializable {
|
||||||
return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL;
|
return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets whether the file can end before the last field on the last line, if quoted, has a closing quote.
|
||||||
|
*
|
||||||
|
* @return {@code true} if so, {@code false} to throw an {@link IOException}.
|
||||||
|
* @since 1.10.0
|
||||||
|
*/
|
||||||
|
public boolean getAllowEofWithoutClosingQuote() {
|
||||||
|
return allowEofWithoutClosingQuote;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets whether missing column names are allowed when parsing the header line.
|
* Gets whether missing column names are allowed when parsing the header line.
|
||||||
*
|
*
|
||||||
|
@ -1512,6 +1568,16 @@ public final class CSVFormat implements Serializable {
|
||||||
return allowMissingColumnNames;
|
return allowMissingColumnNames;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets whether quoted fields allow trailing text after the closing quote.
|
||||||
|
*
|
||||||
|
* @return {@code true} if allowed, {@code false} to throw an {@link IOException}.
|
||||||
|
* @since 1.10.0
|
||||||
|
*/
|
||||||
|
public boolean getAllowTrailingText() {
|
||||||
|
return allowTrailingText;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets whether to flush on close.
|
* Gets whether to flush on close.
|
||||||
*
|
*
|
||||||
|
@ -1692,9 +1758,9 @@ public final class CSVFormat implements Serializable {
|
||||||
int result = 1;
|
int result = 1;
|
||||||
result = prime * result + Arrays.hashCode(headers);
|
result = prime * result + Arrays.hashCode(headers);
|
||||||
result = prime * result + Arrays.hashCode(headerComments);
|
result = prime * result + Arrays.hashCode(headerComments);
|
||||||
return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
|
return prime * result + Objects.hash(duplicateHeaderMode, allowEofWithoutClosingQuote, allowMissingColumnNames, allowTrailingText,
|
||||||
ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator,
|
autoFlush, commentMarker, delimiter, escapeCharacter, ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces,
|
||||||
skipHeaderRecord, trailingDelimiter, trim);
|
nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator, skipHeaderRecord, trailingDelimiter, trim);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -57,6 +57,8 @@ final class Lexer implements Closeable {
|
||||||
|
|
||||||
private final boolean ignoreSurroundingSpaces;
|
private final boolean ignoreSurroundingSpaces;
|
||||||
private final boolean ignoreEmptyLines;
|
private final boolean ignoreEmptyLines;
|
||||||
|
private final boolean allowTrailingText;
|
||||||
|
private final boolean allowEofWithoutClosingQuote;
|
||||||
|
|
||||||
/** The input stream */
|
/** The input stream */
|
||||||
private final ExtendedBufferedReader reader;
|
private final ExtendedBufferedReader reader;
|
||||||
|
@ -72,6 +74,8 @@ final class Lexer implements Closeable {
|
||||||
this.commentStart = mapNullToDisabled(format.getCommentMarker());
|
this.commentStart = mapNullToDisabled(format.getCommentMarker());
|
||||||
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
|
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
|
||||||
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
|
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
|
||||||
|
this.allowTrailingText = format.getAllowTrailingText();
|
||||||
|
this.allowEofWithoutClosingQuote = format.getAllowEofWithoutClosingQuote();
|
||||||
this.delimiterBuf = new char[delimiter.length - 1];
|
this.delimiterBuf = new char[delimiter.length - 1];
|
||||||
this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
|
this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
|
||||||
}
|
}
|
||||||
|
@ -364,17 +368,27 @@ final class Lexer implements Closeable {
|
||||||
token.type = EORECORD;
|
token.type = EORECORD;
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
if (!Character.isWhitespace((char)c)) {
|
if (allowTrailingText) {
|
||||||
// error invalid char between token and next delimiter
|
token.content.append((char) c);
|
||||||
throw new IOException("(line " + getCurrentLineNumber() +
|
} else {
|
||||||
") invalid char between encapsulated token and delimiter");
|
if (!Character.isWhitespace((char)c)) {
|
||||||
|
// error invalid char between token and next delimiter
|
||||||
|
throw new IOException("(line " + getCurrentLineNumber() +
|
||||||
|
") invalid char between encapsulated token and delimiter");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (isEndOfFile(c)) {
|
} else if (isEndOfFile(c)) {
|
||||||
// error condition (end of file before end of token)
|
if (allowEofWithoutClosingQuote) {
|
||||||
throw new IOException("(startline " + startLineNumber +
|
token.type = EOF;
|
||||||
") EOF reached before encapsulated token finished");
|
token.isReady = true; // There is data at EOF
|
||||||
|
return token;
|
||||||
|
} else {
|
||||||
|
// error condition (end of file before end of token)
|
||||||
|
throw new IOException("(startline " + startLineNumber +
|
||||||
|
") EOF reached before encapsulated token finished");
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// consume character
|
// consume character
|
||||||
token.content.append((char) c);
|
token.content.append((char) c);
|
||||||
|
|
|
@ -431,4 +431,30 @@ public class LexerTest {
|
||||||
lexer.trimTrailingSpaces(buffer);
|
lexer.trimTrailingSpaces(buffer);
|
||||||
assertThat(lexer.nextToken(new Token()), matches(EOF, ""));
|
assertThat(lexer.nextToken(new Token()), matches(EOF, ""));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTrailingTextAfterQuote() throws Exception {
|
||||||
|
final String code = "\"a\" b,\"a\" \" b,\"a\" b \"\"";
|
||||||
|
try (final Lexer parser = createLexer(code, CSVFormat.Builder.create().setAllowTrailingText(true).build())) {
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a b"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a \" b"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(EOF, "a b \"\""));
|
||||||
|
}
|
||||||
|
try (final Lexer parser = createLexer(code, CSVFormat.Builder.create().setAllowTrailingText(false).build())) {
|
||||||
|
assertThrows(IOException.class, () -> parser.nextToken(new Token()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEOFWithoutClosingQuote() throws Exception {
|
||||||
|
final String code = "a,\"b";
|
||||||
|
try (final Lexer parser = createLexer(code, CSVFormat.Builder.create().setAllowEofWithoutClosingQuote(true).build())) {
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(EOF, "b"));
|
||||||
|
}
|
||||||
|
try (final Lexer parser = createLexer(code, CSVFormat.Builder.create().setAllowEofWithoutClosingQuote(false).build())) {
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
|
||||||
|
assertThrows(IOException.class, () -> parser.nextToken(new Token()));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue