Add support for String delimiters #76.

Bump PMD core from 6.29.0 to 6.36.0. Fix rule set.
This commit is contained in:
Gary Gregory 2021-07-05 12:06:07 -04:00
parent b4b9a6bc0e
commit d8d080453e
10 changed files with 505 additions and 148 deletions

43
pom.xml
View File

@ -172,6 +172,7 @@
<checkstyle.resourceExcludes>LICENSE.txt, NOTICE.txt, **/maven-archiver/pom.properties</checkstyle.resourceExcludes>
<commons.pmd.version>3.14.0</commons.pmd.version>
<commons.pmd-impl.version>6.36.0</commons.pmd-impl.version>
<commons.jacoco.version>0.8.7</commons.jacoco.version>
<commons.spotbugs.version>4.2.3</commons.spotbugs.version>
<commons.japicmp.version>0.15.3</commons.japicmp.version>
@ -203,6 +204,30 @@
</dependency>
</dependencies>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-pmd-plugin</artifactId>
<version>${commons.pmd.version}</version>
<dependencies>
<dependency>
<groupId>net.sourceforge.pmd</groupId>
<artifactId>pmd-core</artifactId>
<version>${commons.pmd-impl.version}</version>
</dependency>
<dependency>
<groupId>net.sourceforge.pmd</groupId>
<artifactId>pmd-java</artifactId>
<version>${commons.pmd-impl.version}</version>
</dependency>
</dependencies>
<configuration>
<targetJdk>${maven.compiler.target}</targetJdk>
<skipEmptyReport>false</skipEmptyReport>
<rulesets>
<ruleset>${basedir}/src/site/resources/pmd/pmd-ruleset.xml</ruleset>
</rulesets>
</configuration>
</plugin>
</plugins>
</pluginManagement>
<plugins>
@ -250,15 +275,6 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-pmd-plugin</artifactId>
<version>${commons.pmd.version}</version>
<configuration>
<targetJdk>${maven.compiler.target}</targetJdk>
<skipEmptyReport>false</skipEmptyReport>
<analysisCache>true</analysisCache>
<rulesets>
<ruleset>${basedir}/src/site/resources/pmd/pmd-ruleset.xml</ruleset>
</rulesets>
</configuration>
</plugin>
<!-- We need to add our test data files to rat exclusions -->
@ -322,15 +338,6 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-pmd-plugin</artifactId>
<version>${commons.pmd.version}</version>
<configuration>
<targetJdk>${maven.compiler.target}</targetJdk>
<skipEmptyReport>false</skipEmptyReport>
<analysisCache>true</analysisCache>
<rulesets>
<ruleset>${basedir}/src/site/resources/pmd/pmd-ruleset.xml</ruleset>
</rulesets>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>

View File

@ -65,6 +65,7 @@
<action issue="CSV-184" type="add" dev="ggregory" due-to="Gaurav Agarwal, M. Steiger, Gary Gregory">Make the method CSVRecord.putIn(Map) public.</action>
<action type="add" dev="ggregory" due-to="dota17">Add test cases for CSVRecord with get(Enum) and toString. #54.</action>
<action type="add" dev="ggregory" due-to="Gary Gregory, dota17">Add and use CSVFormat.Builder, deprecated CSVFormat#with methods, based on #73.</action>
<action issue="CSV-206" type="add" dev="ggregory" due-to="Gary Gregory, dota17">Add support for String delimiters #76.</action>
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Gary Gregory">Update org.junit.jupiter:junit-jupiter from 5.6.0 to 5.7.0, #84 #109</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Update tests from Apache Commons Lang 3.9 to 3.12.0.</action>
@ -84,6 +85,7 @@
<action type="update" dev="ggregory" due-to="Dependabot">Bump commons.spotbugs.version from 4.0.4 to 4.2.3 (Java 16).</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons.javadoc.version from 3.2.0 to 3.3.0.</action>
<action type="update" dev="ggregory" due-to="Dependabot">Bump jmh-generator-annprocess from 1.5.2 to 1.32 #151.</action>
<action type="update" dev="ggregory" due-to="Dependabot">Bump PMD core from 6.29.0 to 6.36.0.</action>
</release>
<release version="1.8" date="2020-02-01" description="Feature and bug fix release (Java 8).

View File

@ -196,14 +196,14 @@ public final class CSVFormat implements Serializable {
private Character commentMarker;
private char delimiter;
private String delimiter;
private Character escapeCharacter;
private String[] headers;
private String[] headerComments;
private String[] headers;
private boolean ignoreEmptyLines;
private boolean ignoreHeaderCase;
@ -330,7 +330,17 @@ public final class CSVFormat implements Serializable {
* @return This instance.
*/
public Builder setDelimiter(final char delimiter) {
if (isLineBreak(delimiter)) {
return setDelimiter(String.valueOf(delimiter));
}
/**
* Sets the delimiter character.
*
* @param delimiter the delimiter character.
* @return This instance.
*/
public Builder setDelimiter(final String delimiter) {
if (containsLineBreak(delimiter)) {
throw new IllegalArgumentException("The delimiter cannot be a line break");
}
this.delimiter = delimiter;
@ -1146,12 +1156,34 @@ public final class CSVFormat implements Serializable {
return values == null ? null : values.clone();
}
/**
* Returns true if the given string contains the search char.
*
* @param source the string to check.
*
* @return true if {@code c} contains a line break character
*/
private static boolean contains(final String source, final char searchCh) {
return Objects.requireNonNull(source, "source").indexOf(searchCh) >= 0;
}
/**
* Returns true if the given string contains a line break character.
*
* @param source the string to check.
*
* @return true if {@code c} contains a line break character.
*/
private static boolean containsLineBreak(final String source) {
return contains(source, CR) || contains(source, LF);
}
/**
* Returns true if the given character is a line break character.
*
* @param c the character to check
* @param c the character to check.
*
* @return true if {@code c} is a line break character
* @return true if {@code c} is a line break character.
*/
private static boolean isLineBreak(final char c) {
return c == LF || c == CR;
@ -1160,9 +1192,9 @@ public final class CSVFormat implements Serializable {
/**
* Returns true if the given character is a line break character.
*
* @param c the character to check, may be null
* @param c the character to check, may be null.
*
* @return true if {@code c} is a line break character (and not null)
* @return true if {@code c} is a line break character (and not null).
*/
private static boolean isLineBreak(final Character c) {
return c != null && isLineBreak(c.charValue());
@ -1186,7 +1218,8 @@ public final class CSVFormat implements Serializable {
* @see #TDF
*/
public static CSVFormat newFormat(final char delimiter) {
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false, true);
return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false,
true);
}
static String[] toStringArray(final Object[] values) {
@ -1195,8 +1228,7 @@ public final class CSVFormat implements Serializable {
}
final String[] strings = new String[values.length];
for (int i = 0; i < values.length; i++) {
final Object value = values[i];
strings[i] = value == null ? null : value.toString();
strings[i] = Objects.toString(values[i], null);
}
return strings;
}
@ -1237,7 +1269,7 @@ public final class CSVFormat implements Serializable {
private final Character commentMarker; // null if commenting is disabled
private final char delimiter;
private final String delimiter;
private final Character escapeCharacter; // null if escaping is disabled
@ -1312,7 +1344,7 @@ public final class CSVFormat implements Serializable {
* @param autoFlush TODO Doc me.
* @throws IllegalArgumentException if the delimiter is a line break character.
*/
private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape,
private CSVFormat(final String delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape,
final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames,
final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush,
@ -1353,30 +1385,18 @@ public final class CSVFormat implements Serializable {
if (this == obj) {
return true;
}
if ((obj == null) || (getClass() != obj.getClass())) {
if (obj == null || getClass() != obj.getClass()) {
return false;
}
final CSVFormat other = (CSVFormat) obj;
if ((delimiter != other.delimiter) || (trailingDelimiter != other.trailingDelimiter) || (autoFlush != other.autoFlush) || (trim != other.trim)) {
return false;
}
if ((allowMissingColumnNames != other.allowMissingColumnNames) || (allowDuplicateHeaderNames != other.allowDuplicateHeaderNames) ||
(ignoreHeaderCase != other.ignoreHeaderCase) || (quoteMode != other.quoteMode)) {
return false;
}
if (!Objects.equals(quoteCharacter, other.quoteCharacter) || !Objects.equals(commentMarker, other.commentMarker) ||
!Objects.equals(escapeCharacter, other.escapeCharacter) || !Objects.equals(nullString, other.nullString)) {
return false;
}
if (!Arrays.equals(header, other.header) || (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) ||
(ignoreEmptyLines != other.ignoreEmptyLines) || (skipHeaderRecord != other.skipHeaderRecord)) {
return false;
}
if (!Objects.equals(recordSeparator, other.recordSeparator) || !Arrays.equals(headerComments, other.headerComments)) {
return false;
}
return true;
return allowDuplicateHeaderNames == other.allowDuplicateHeaderNames && allowMissingColumnNames == other.allowMissingColumnNames &&
autoFlush == other.autoFlush && Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) &&
Objects.equals(escapeCharacter, other.escapeCharacter) && Arrays.equals(header, other.header) &&
Arrays.equals(headerComments, other.headerComments) && ignoreEmptyLines == other.ignoreEmptyLines &&
ignoreHeaderCase == other.ignoreHeaderCase && ignoreSurroundingSpaces == other.ignoreSurroundingSpaces &&
Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode == other.quoteMode &&
Objects.equals(quotedNullString, other.quotedNullString) && Objects.equals(recordSeparator, other.recordSeparator) &&
skipHeaderRecord == other.skipHeaderRecord && trailingDelimiter == other.trailingDelimiter && trim == other.trim;
}
/**
@ -1437,11 +1457,22 @@ public final class CSVFormat implements Serializable {
}
/**
* Returns the character delimiting the values (typically ';', ',' or '\t').
* Returns the first character delimiting the values (typically ';', ',' or '\t').
*
* @return the delimiter character
* @return the first delimiter character.
* @deprecated Use {@link #getDelimiterString()}.
*/
@Deprecated
public char getDelimiter() {
return delimiter.charAt(0);
}
/**
* Returns the character delimiting the values (typically ";", "," or "\t").
*
* @return the delimiter.
*/
public String getDelimiterString() {
return delimiter;
}
@ -1571,9 +1602,14 @@ public final class CSVFormat implements Serializable {
@Override
public int hashCode() {
return Objects.hash(delimiter, quoteMode, quoteCharacter, commentMarker, escapeCharacter, nullString, ignoreSurroundingSpaces, ignoreHeaderCase,
ignoreEmptyLines, skipHeaderRecord, allowDuplicateHeaderNames, trim, autoFlush, trailingDelimiter, allowMissingColumnNames, recordSeparator,
Arrays.hashCode(header), Arrays.hashCode(headerComments));
final int prime = 31;
int result = 1;
result = prime * result + Arrays.hashCode(header);
result = prime * result + Arrays.hashCode(headerComments);
result = prime * result + Objects.hash(allowDuplicateHeaderNames, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator,
skipHeaderRecord, trailingDelimiter, trim);
return result;
}
/**
@ -1587,6 +1623,37 @@ public final class CSVFormat implements Serializable {
return commentMarker != null;
}
/**
* Matches whether the next characters constitute a delimiter
*
* @param ch
* the current char
* @param charSeq
* the match char sequence
* @param startIndex
* where start to match
* @param delimiter
* the delimiter
* @param delimiterLength
* the delimiter length
* @return true if the match is successful
*/
private boolean isDelimiter(final char ch, final CharSequence charSeq, final int startIndex, final char[] delimiter, final int delimiterLength) {
if (ch != delimiter[0]) {
return false;
}
final int len = charSeq.length();
if (startIndex + delimiterLength > len) {
return false;
}
for (int i = 1; i < delimiterLength; i++) {
if (charSeq.charAt(startIndex + i) != delimiter[i]) {
return false;
}
}
return true;
}
/**
* Returns whether escape are being processed.
*
@ -1702,7 +1769,7 @@ public final class CSVFormat implements Serializable {
final int offset = 0;
final int len = value.length();
if (!newRecord) {
out.append(getDelimiter());
out.append(getDelimiterString());
}
if (object == null) {
out.append(value);
@ -1737,7 +1804,7 @@ public final class CSVFormat implements Serializable {
private void print(final Reader reader, final Appendable out, final boolean newRecord) throws IOException {
// Reader is never null
if (!newRecord) {
out.append(getDelimiter());
out.append(getDelimiterString());
}
if (isQuoteCharacterSet()) {
printWithQuotes(reader, out);
@ -1769,16 +1836,16 @@ public final class CSVFormat implements Serializable {
/**
* Outputs the trailing delimiter (if set) followed by the record separator (if set).
*
* @param out where to write
* @param appendable where to write
* @throws IOException If an I/O error occurs
* @since 1.4
*/
public void println(final Appendable out) throws IOException {
public void println(final Appendable appendable) throws IOException {
if (getTrailingDelimiter()) {
out.append(getDelimiter());
appendable.append(getDelimiterString());
}
if (recordSeparator != null) {
out.append(recordSeparator);
appendable.append(recordSeparator);
}
}
@ -1790,35 +1857,37 @@ public final class CSVFormat implements Serializable {
* the record, so there is no need to call {@link #println(Appendable)}.
* </p>
*
* @param out where to write.
* @param appendable where to write.
* @param values values to output.
* @throws IOException If an I/O error occurs.
* @since 1.4
*/
public void printRecord(final Appendable out, final Object... values) throws IOException {
public void printRecord(final Appendable appendable, final Object... values) throws IOException {
for (int i = 0; i < values.length; i++) {
print(values[i], out, i == 0);
print(values[i], appendable, i == 0);
}
println(out);
println(appendable);
}
/*
* Note: must only be called if escaping is enabled, otherwise will generate NPE
* Note: Must only be called if escaping is enabled, otherwise will generate NPE.
*/
private void printWithEscapes(final CharSequence value, final Appendable out) throws IOException {
private void printWithEscapes(final CharSequence charSeq, final Appendable appendable) throws IOException {
int start = 0;
int pos = 0;
final int end = value.length();
final int end = charSeq.length();
final char delim = getDelimiter();
final char[] delim = getDelimiterString().toCharArray();
final int delimLength = delim.length;
final char escape = getEscapeCharacter().charValue();
while (pos < end) {
char c = value.charAt(pos);
if (c == CR || c == LF || c == delim || c == escape) {
char c = charSeq.charAt(pos);
boolean isDelimiterStart = isDelimiter(c, charSeq, pos, delim, delimLength);
if (c == CR || c == LF || c == escape || isDelimiterStart) {
// write out segment up until this char
if (pos > start) {
out.append(value, start, pos);
appendable.append(charSeq, start, pos);
}
if (c == LF) {
c = 'n';
@ -1826,8 +1895,17 @@ public final class CSVFormat implements Serializable {
c = 'r';
}
out.append(escape);
out.append(c);
appendable.append(escape);
appendable.append(c);
if (isDelimiterStart) {
for (int i = 1; i < delimLength; i++) {
pos++;
c = charSeq.charAt(pos);
appendable.append(escape);
appendable.append(c);
}
}
start = pos + 1; // start on the current char after this one
}
@ -1836,7 +1914,7 @@ public final class CSVFormat implements Serializable {
// write last segment
if (pos > start) {
out.append(value, start, pos);
appendable.append(charSeq, start, pos);
}
}
@ -1844,14 +1922,19 @@ public final class CSVFormat implements Serializable {
int start = 0;
int pos = 0;
final char delim = getDelimiter();
@SuppressWarnings("resource") // Temp reader on input reader.
final ExtendedBufferedReader bufferedReader = new ExtendedBufferedReader(reader);
final char[] delim = getDelimiterString().toCharArray();
final int delimLength = delim.length;
final char escape = getEscapeCharacter().charValue();
final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE);
int c;
while (-1 != (c = reader.read())) {
while (-1 != (c = bufferedReader.read())) {
builder.append((char) c);
if (c == CR || c == LF || c == delim || c == escape) {
boolean isDelimiterStart = isDelimiter((char) c, builder.toString() + new String(bufferedReader.lookAhead(delimLength - 1)), pos, delim,
delimLength);
if (c == CR || c == LF || c == escape || isDelimiterStart) {
// write out segment up until this char
if (pos > start) {
out.append(builder.substring(start, pos));
@ -1867,6 +1950,14 @@ public final class CSVFormat implements Serializable {
out.append(escape);
out.append((char) c);
if (isDelimiterStart) {
for (int i = 1; i < delimLength; i++) {
c = bufferedReader.read();
out.append(escape);
out.append((char) c);
}
}
start = pos + 1; // start on the current char after this one
}
pos++;
@ -1882,13 +1973,14 @@ public final class CSVFormat implements Serializable {
* Note: must only be called if quoting is enabled, otherwise will generate NPE
*/
// the original object is needed so can check for Number
private void printWithQuotes(final Object object, final CharSequence value, final Appendable out, final boolean newRecord) throws IOException {
private void printWithQuotes(final Object object, final CharSequence charSeq, final Appendable out, final boolean newRecord) throws IOException {
boolean quote = false;
int start = 0;
int pos = 0;
final int len = value.length();
final int len = charSeq.length();
final char delimChar = getDelimiter();
final char[] delim = getDelimiterString().toCharArray();
final int delimLength = delim.length;
final char quoteChar = getQuoteCharacter().charValue();
// If escape char not specified, default to the quote char
// This avoids having to keep checking whether there is an escape character
@ -1909,7 +2001,7 @@ public final class CSVFormat implements Serializable {
break;
case NONE:
// Use the existing escaping code
printWithEscapes(value, out);
printWithEscapes(charSeq, out);
return;
case MINIMAL:
if (len <= 0) {
@ -1921,7 +2013,7 @@ public final class CSVFormat implements Serializable {
quote = true;
}
} else {
char c = value.charAt(pos);
char c = charSeq.charAt(pos);
if (c <= COMMENT) {
// Some other chars at the start of a value caused the parser to fail, so for now
@ -1930,8 +2022,8 @@ public final class CSVFormat implements Serializable {
quote = true;
} else {
while (pos < len) {
c = value.charAt(pos);
if (c == LF || c == CR || c == quoteChar || c == delimChar || c == escapeChar) {
c = charSeq.charAt(pos);
if (c == LF || c == CR || c == quoteChar || c == escapeChar || isDelimiter(c, charSeq, pos, delim, delimLength)) {
quote = true;
break;
}
@ -1940,7 +2032,7 @@ public final class CSVFormat implements Serializable {
if (!quote) {
pos = len - 1;
c = value.charAt(pos);
c = charSeq.charAt(pos);
// Some other chars at the end caused the parser to fail, so for now
// encapsulate if we end in anything less than ' '
if (c <= SP) {
@ -1952,7 +2044,7 @@ public final class CSVFormat implements Serializable {
if (!quote) {
// no encapsulation needed - write out the original value
out.append(value, start, len);
out.append(charSeq, start, len);
return;
}
break;
@ -1962,7 +2054,7 @@ public final class CSVFormat implements Serializable {
if (!quote) {
// no encapsulation needed - write out the original value
out.append(value, start, len);
out.append(charSeq, start, len);
return;
}
@ -1972,10 +2064,10 @@ public final class CSVFormat implements Serializable {
// Pick up where we left off: pos should be positioned on the first character that caused
// the need for encapsulation.
while (pos < len) {
final char c = value.charAt(pos);
final char c = charSeq.charAt(pos);
if (c == quoteChar || c == escapeChar) {
// write out the chunk up until this point
out.append(value, start, pos);
out.append(charSeq, start, pos);
out.append(escapeChar); // now output the escape
start = pos; // and restart with the matched char
}
@ -1983,7 +2075,7 @@ public final class CSVFormat implements Serializable {
}
// write the last segment
out.append(value, start, pos);
out.append(charSeq, start, pos);
out.append(quoteChar);
}
@ -1992,10 +2084,10 @@ public final class CSVFormat implements Serializable {
*
* @throws IOException If an I/O error occurs
*/
private void printWithQuotes(final Reader reader, final Appendable out) throws IOException {
private void printWithQuotes(final Reader reader, final Appendable appendable) throws IOException {
if (getQuoteMode() == QuoteMode.NONE) {
printWithEscapes(reader, out);
printWithEscapes(reader, appendable);
return;
}
@ -2004,7 +2096,7 @@ public final class CSVFormat implements Serializable {
final char quote = getQuoteCharacter().charValue();
final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE);
out.append(quote);
appendable.append(quote);
int c;
while (-1 != (c = reader.read())) {
@ -2012,23 +2104,23 @@ public final class CSVFormat implements Serializable {
if (c == quote) {
// write out segment up until this char
if (pos > 0) {
out.append(builder.substring(0, pos));
appendable.append(builder.substring(0, pos));
builder.setLength(0);
pos = -1;
}
out.append(quote);
out.append((char) c);
appendable.append(quote);
appendable.append((char) c);
}
pos++;
}
// write last segment
if (pos > 0) {
out.append(builder.substring(0, pos));
appendable.append(builder.substring(0, pos));
}
out.append(quote);
appendable.append(quote);
}
@Override
@ -2086,19 +2178,19 @@ public final class CSVFormat implements Serializable {
* @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes.
*/
private void validate() throws IllegalArgumentException {
if (isLineBreak(delimiter)) {
if (containsLineBreak(delimiter)) {
throw new IllegalArgumentException("The delimiter cannot be a line break");
}
if (quoteCharacter != null && delimiter == quoteCharacter.charValue()) {
if (quoteCharacter != null && contains(delimiter, quoteCharacter.charValue())) {
throw new IllegalArgumentException("The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')");
}
if (escapeCharacter != null && delimiter == escapeCharacter.charValue()) {
if (escapeCharacter != null && contains(delimiter, escapeCharacter.charValue())) {
throw new IllegalArgumentException("The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')");
}
if (commentMarker != null && delimiter == commentMarker.charValue()) {
if (commentMarker != null && contains(delimiter, commentMarker.charValue())) {
throw new IllegalArgumentException("The comment start character and the delimiter cannot be the same ('" + commentMarker + "')");
}

View File

@ -26,7 +26,7 @@ final class Constants {
static final char BACKSPACE = '\b';
static final char COMMA = ',';
static final String COMMA = ",";
/**
* Starts a comment, the remainder of the line is the comment.

View File

@ -112,7 +112,7 @@ final class ExtendedBufferedReader extends BufferedReader {
* @return the next character
*
* @throws IOException
* if there is an error in reading
* If an I/O error occurs
*/
int lookAhead() throws IOException {
super.mark(1);
@ -122,6 +122,23 @@ final class ExtendedBufferedReader extends BufferedReader {
return c;
}
/**
* Returns the next n characters in the current reader without consuming them. The next call to {@link #read()} will still return the next value. This
* doesn't affect line number or last character.
*
* @param n the number characters look ahead.
* @return the next n characters.
* @throws IOException If an I/O error occurs
*/
char[] lookAhead(final int n) throws IOException {
final char[] buf = new char[n];
super.mark(n);
super.read(buf, 0, n);
super.reset();
return buf;
}
@Override
public int read() throws IOException {
final int current = super.read();
@ -130,7 +147,7 @@ final class ExtendedBufferedReader extends BufferedReader {
eolCounter++;
}
lastChar = current;
this.position++;
position++;
return lastChar;
}

View File

@ -48,7 +48,7 @@ final class Lexer implements Closeable {
*/
private static final char DISABLED = '\ufffe';
private final char delimiter;
private final char[] delimiter;
private final char escape;
private final char quoteChar;
private final char commentStart;
@ -62,7 +62,7 @@ final class Lexer implements Closeable {
Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
this.reader = reader;
this.delimiter = format.getDelimiter();
this.delimiter = format.getDelimiterString().toCharArray();
this.escape = mapNullToDisabled(format.getEscapeCharacter());
this.quoteChar = mapNullToDisabled(format.getQuoteCharacter());
this.commentStart = mapNullToDisabled(format.getCommentMarker());
@ -111,26 +111,72 @@ final class Lexer implements Closeable {
return ch == commentStart;
}
boolean isDelimiter(final int ch) {
return ch == delimiter;
/**
* Determine whether the next characters constitute a delimiter through {@link ExtendedBufferedReader#lookAhead(int)}
*
* @param ch
* the current character.
* @return true if the next characters constitute a delimiter.
* @throws IOException If an I/O error occurs.
*/
boolean isDelimiter(final int ch) throws IOException {
if (ch != delimiter[0]) {
return false;
}
final int len = delimiter.length - 1;
final char[] buf = reader.lookAhead(len);
for (int i = 0; i < len; i++) {
if (buf[i] != delimiter[i+1]) {
return false;
}
}
final int count = reader.read(buf, 0, len);
return count != END_OF_STREAM;
}
/**
* @return true if the given character indicates end of file
* Tests if the given character indicates end of file.
*
* @return true if the given character indicates end of file.
*/
boolean isEndOfFile(final int ch) {
return ch == END_OF_STREAM;
}
/**
* Tests if the given character is the escape character.
*
* @return true if the given character is the escape character.
*/
boolean isEscape(final int ch) {
return ch == escape;
}
/**
* Tests if the next characters constitute a escape delimiter through {@link ExtendedBufferedReader#lookAhead(int)}.
*
* For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]".
*
* @return true if the next characters constitute a escape delimiter.
* @throws IOException If an I/O error occurs.
*/
boolean isEscapeDelimiter() throws IOException {
final int len = 2 * delimiter.length - 1;
final char[] buf = reader.lookAhead(len);
if (buf[0] != delimiter[0]) {
return false;
}
for (int i = 1; i < delimiter.length; i++) {
if (buf[2 * i] != delimiter[i] || buf[2 * i - 1] != escape) {
return false;
}
}
final int count = reader.read(buf, 0, len);
return count != END_OF_STREAM;
}
private boolean isMetaChar(final int ch) {
return ch == delimiter ||
ch == escape ||
ch == quoteChar ||
ch == commentStart;
return ch == escape || ch == quoteChar || ch == commentStart;
}
boolean isQuoteChar(final int ch) {
@ -138,7 +184,7 @@ final class Lexer implements Closeable {
}
/**
* Checks if the current character represents the start of a line: a CR, LF or is at the start of the file.
* Tests if the current character represents the start of a line: a CR, LF or is at the start of the file.
*
* @param ch the character to check
* @return true if the character is at the start of a line.
@ -148,9 +194,12 @@ final class Lexer implements Closeable {
}
/**
* @return true if the given char is a whitespace character
* Tests if the given char is a whitespace character.
*
* @return true if the given char is a whitespace character.
* @throws IOException If an I/O error occurs.
*/
boolean isWhitespace(final int ch) {
boolean isWhitespace(final int ch) throws IOException {
return !isDelimiter(ch) && Character.isWhitespace((char) ch);
}
@ -166,9 +215,8 @@ final class Lexer implements Closeable {
*
* @param token
* an existing Token object to reuse. The caller is responsible to initialize the Token.
* @return the next token found
* @throws java.io.IOException
* on stream access error
* @return the next token found.
* @throws java.io.IOException on stream access error.
*/
Token nextToken(final Token token) throws IOException {
@ -256,10 +304,11 @@ final class Lexer implements Closeable {
/**
* Parses an encapsulated token.
* <p/>
* <p>
* Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included
* in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after
* an encapsulated token are ignored. The token is finished when one of the following conditions become true:
* </p>
* <ul>
* <li>an unescaped encapsulator has been reached, and is followed by optional whitespace then:</li>
* <ul>
@ -282,11 +331,15 @@ final class Lexer implements Closeable {
c = reader.read();
if (isEscape(c)) {
final int unescaped = readEscape();
if (unescaped == END_OF_STREAM) { // unexpected char after escape
token.content.append((char) c).append((char) reader.getLastChar());
if (isEscapeDelimiter()) {
token.content.append(delimiter);
} else {
token.content.append((char) unescaped);
final int unescaped = readEscape();
if (unescaped == END_OF_STREAM) { // unexpected char after escape
token.content.append((char) c).append((char) reader.getLastChar());
} else {
token.content.append((char) unescaped);
}
}
} else if (isQuoteChar(c)) {
if (isQuoteChar(reader.lookAhead())) {
@ -330,9 +383,10 @@ final class Lexer implements Closeable {
/**
* Parses a simple token.
* <p/>
* <p>
* Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
* delimiters (as \, or \;). The token is finished when one of the following conditions become true:
* </p>
* <ul>
* <li>end of line has been reached (EORECORD)</li>
* <li>end of stream has been reached (EOF)</li>
@ -364,11 +418,15 @@ final class Lexer implements Closeable {
break;
}
if (isEscape(ch)) {
final int unescaped = readEscape();
if (unescaped == END_OF_STREAM) { // unexpected char after escape
token.content.append((char) ch).append((char) reader.getLastChar());
if (isEscapeDelimiter()) {
token.content.append(delimiter);
} else {
token.content.append((char) unescaped);
final int unescaped = readEscape();
if (unescaped == END_OF_STREAM) { // unexpected char after escape
token.content.append((char) ch).append((char) reader.getLastChar());
} else {
token.content.append((char) unescaped);
}
}
ch = reader.read(); // continue
} else {

View File

@ -15,7 +15,7 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
<ruleset name="commons-rng-customized"
<ruleset name="commons-csv-customized"
xmlns="http://pmd.sourceforge.net/ruleset/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://pmd.sourceforge.net/ruleset/2.0.0 http://pmd.sourceforge.net/ruleset_2_0_0.xsd">
@ -35,7 +35,8 @@
<rule ref="category/java/codestyle.xml/DuplicateImports"/>
<rule ref="category/java/codestyle.xml/ExtendsObject"/>
<rule ref="category/java/codestyle.xml/ForLoopShouldBeWhileLoop"/>
<rule ref="category/java/codestyle.xml/TooManyStaticImports"/>
<!-- See custom rules -->
<!-- <rule ref="category/java/codestyle.xml/TooManyStaticImports"/> -->
<rule ref="category/java/codestyle.xml/UnnecessaryFullyQualifiedName"/>
<rule ref="category/java/codestyle.xml/UnnecessaryModifier"/>
<rule ref="category/java/codestyle.xml/UnnecessaryReturn"/>
@ -82,9 +83,7 @@
<rule ref="category/java/codestyle.xml/TooManyStaticImports">
<properties>
<property name="violationSuppressXPath"
value="//ClassOrInterfaceDeclaration[.[typeIs('org.apache.commons.csv.CSVFormat')]]" />
<property name="violationSuppressXPath"
value="//ClassOrInterfaceDeclaration[.[typeIs('org.apache.commons.csv.Lexer')]]" />
value="//ClassOrInterfaceDeclaration[contains(@Name, 'org.apache.commons.csv.CSVFormat')] or //ClassOrInterfaceDeclaration[contains(@SimpleName, 'org.apache.commons.csv.Lexer')]" />
</properties>
</rule>

View File

@ -89,6 +89,84 @@ public class CSVParserTest {
}
}
@Test
public void testParseWithDelimiterWithQuote() throws IOException {
String source = "'a,b,c',xyz";
CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'');
try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
CSVRecord csvRecord = csvParser.nextRecord();
assertEquals("a,b,c", csvRecord.get(0));
assertEquals("xyz", csvRecord.get(1));
}
}
@Test
public void testParseWithDelimiterStringWithQuote() throws IOException {
String source = "'a[|]b[|]c'[|]xyz\r\nabc[abc][|]xyz";
CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').build();
try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
CSVRecord csvRecord = csvParser.nextRecord();
assertEquals("a[|]b[|]c", csvRecord.get(0));
assertEquals("xyz", csvRecord.get(1));
csvRecord = csvParser.nextRecord();
assertEquals("abc[abc]", csvRecord.get(0));
assertEquals("xyz", csvRecord.get(1));
}
}
@Test
public void testParseWithDelimiterWithEscape() throws IOException {
String source = "a!,b!,c,xyz";
CSVFormat csvFormat = CSVFormat.DEFAULT.withEscape('!');
try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
CSVRecord csvRecord = csvParser.nextRecord();
assertEquals("a,b,c", csvRecord.get(0));
assertEquals("xyz", csvRecord.get(1));
}
}
@Test
public void testParseWithDelimiterStringWithEscape() throws IOException {
String source = "a![!|!]b![|]c[|]xyz\r\nabc[abc][|]xyz";
CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').build();
try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
CSVRecord csvRecord = csvParser.nextRecord();
assertEquals("a[|]b![|]c", csvRecord.get(0));
assertEquals("xyz", csvRecord.get(1));
csvRecord = csvParser.nextRecord();
assertEquals("abc[abc]", csvRecord.get(0));
assertEquals("xyz", csvRecord.get(1));
}
}
@Test
public void testParseWithQuoteWithEscape() throws IOException {
String source = "'a?,b?,c?d',xyz";
CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'').withEscape('?');
try (CSVParser csvParser = csvFormat.parse(new StringReader(source))) {
CSVRecord csvRecord = csvParser.nextRecord();
assertEquals("a,b,c?d", csvRecord.get(0));
assertEquals("xyz", csvRecord.get(1));
}
}
@Test
public void testParseWithQuoteThrowsException() {
CSVFormat csvFormat = CSVFormat.DEFAULT.withQuote('\'');
assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c','")).nextRecord());
assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'a,b,c'abc,xyz")).nextRecord());
assertThrows(IOException.class, () -> csvFormat.parse(new StringReader("'abc'a,b,c',xyz")).nextRecord());
}
@Test
public void testNotValueCSV() throws IOException {
String source = "#";
CSVFormat csvFormat = CSVFormat.DEFAULT.withCommentMarker('#');
CSVParser csvParser = csvFormat.parse(new StringReader(source));
CSVRecord csvRecord = csvParser.nextRecord();
assertNull(csvRecord);
}
@Test
public void testBackslashEscaping() throws IOException {

View File

@ -17,6 +17,7 @@
package org.apache.commons.csv;
import static org.apache.commons.csv.Constants.BACKSLASH;
import static org.apache.commons.csv.Constants.CR;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
@ -67,7 +68,6 @@ import org.junit.jupiter.api.Test;
*/
public class CSVPrinterTest {
private static final char BACKSLASH_CH = '\\';
private static final char DQUOTE_CHAR = '"';
private static final char EURO_CH = '\u20AC';
private static final int ITERATIONS_FOR_RANDOM_TEST = 50000;
@ -161,11 +161,14 @@ public class CSVPrinterTest {
private CSVPrinter printWithHeaderComments(final StringWriter sw, final Date now, final CSVFormat baseFormat)
throws IOException {
CSVFormat format = baseFormat;
// Use withHeaderComments first to test CSV-145
format = format.withHeaderComments("Generated by Apache Commons CSV 1.1", now);
format = format.withCommentMarker('#');
format = format.withHeader("Col1", "Col2");
// @formatter:off
CSVFormat format = baseFormat.builder()
.setHeaderComments("Generated by Apache Commons CSV 1.1", now)
.setCommentMarker('#')
.setHeader("Col1", "Col2")
.build();
// @formatter:on
final CSVPrinter csvPrinter = format.print(sw);
csvPrinter.printRecord("A", "B");
csvPrinter.printRecord("C", "D");
@ -209,7 +212,7 @@ public class CSVPrinterTest {
ch = '\'';
break;
case 8:
ch = BACKSLASH_CH;
ch = BACKSLASH;
break;
default:
ch = (char) r.nextInt(300);
@ -356,6 +359,28 @@ public class CSVPrinterTest {
}
}
@Test
public void testDelimeterStringQuoted() throws IOException {
final StringWriter sw = new StringWriter();
try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.builder().setDelimiter("[|]").setQuote('\'').build())) {
printer.print("a[|]b[|]c");
printer.print("xyz");
assertEquals("'a[|]b[|]c'[|]xyz", sw.toString());
}
}
@Test
public void testDelimeterStringQuoteNone() throws IOException {
final StringWriter sw = new StringWriter();
final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").setEscape('!').setQuoteMode(QuoteMode.NONE).build();
try (final CSVPrinter printer = new CSVPrinter(sw, format)) {
printer.print("a[|]b[|]c");
printer.print("xyz");
printer.print("a[xy]bc[]");
assertEquals("a![!|!]b![!|!]c[|]xyz[|]a[xy]bc[]", sw.toString());
}
}
@Test
public void testDelimiterEscaped() throws IOException {
final StringWriter sw = new StringWriter();
@ -376,6 +401,16 @@ public class CSVPrinterTest {
}
}
@Test
public void testDelimiterStringEscaped() throws IOException {
final StringWriter sw = new StringWriter();
try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.builder().setDelimiter("|||").setEscape('!').setQuote(null).build())) {
printer.print("a|||b|||c");
printer.print("xyz");
assertEquals("a!|!|!|b!|!|!|c|||xyz", sw.toString());
}
}
@Test
public void testDisabledComment() throws IOException {
final StringWriter sw = new StringWriter();
@ -688,7 +723,7 @@ public class CSVPrinterTest {
@Test
@Disabled
public void testJira135_part1() throws IOException {
final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH_CH);
final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH);
final StringWriter sw = new StringWriter();
final List<String> list = new LinkedList<>();
try (final CSVPrinter printer = new CSVPrinter(sw, format)) {
@ -704,7 +739,7 @@ public class CSVPrinterTest {
@Test
@Disabled
public void testJira135_part2() throws IOException {
final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH_CH);
final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH);
final StringWriter sw = new StringWriter();
final List<String> list = new LinkedList<>();
try (final CSVPrinter printer = new CSVPrinter(sw, format)) {
@ -720,7 +755,7 @@ public class CSVPrinterTest {
@Test
@Disabled
public void testJira135_part3() throws IOException {
final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH_CH);
final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH);
final StringWriter sw = new StringWriter();
final List<String> list = new LinkedList<>();
try (final CSVPrinter printer = new CSVPrinter(sw, format)) {
@ -736,7 +771,7 @@ public class CSVPrinterTest {
@Test
@Disabled
public void testJira135All() throws IOException {
final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH_CH);
final CSVFormat format = CSVFormat.DEFAULT.withRecordSeparator('\n').withQuote(DQUOTE_CHAR).withEscape(BACKSLASH);
final StringWriter sw = new StringWriter();
final List<String> list = new LinkedList<>();
try (final CSVPrinter printer = new CSVPrinter(sw, format)) {

View File

@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv.issues;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.junit.jupiter.api.Test;
public class JiraCsv206Test {
@Test
public void testJiraCsv206MultipleCharacterDelimiter() throws IOException {
// Read with multiple character delimiter
final String source = "FirstName[|]LastName[|]Address\r\nJohn[|]Smith[|]123 Main St.";
final StringReader reader = new StringReader(source);
final CSVFormat csvFormat = CSVFormat.DEFAULT.builder().setDelimiter("[|]").build();
CSVRecord record = null;
try (final CSVParser csvParser = new CSVParser(reader, csvFormat)) {
Iterator<CSVRecord> iterator = csvParser.iterator();
record = iterator.next();
assertEquals("FirstName", record.get(0));
assertEquals("LastName", record.get(1));
assertEquals("Address", record.get(2));
record = iterator.next();
assertEquals("John", record.get(0));
assertEquals("Smith", record.get(1));
assertEquals("123 Main St.", record.get(2));
}
// Write with multiple character delimiter
final String outString = "# Change delimiter to [I]\r\n" + "first name[I]last name[I]address\r\n" + "John[I]Smith[I]123 Main St.";
final String comment = "Change delimiter to [I]";
// @formatter:off
final CSVFormat format = CSVFormat.EXCEL.builder()
.setDelimiter("[I]").setHeader("first name", "last name", "address")
.setCommentMarker('#')
.setHeaderComments(comment).build();
// @formatter:off
final StringBuilder out = new StringBuilder();
try (final CSVPrinter printer = format.print(out)) {
printer.print(record.get(0));
printer.print(record.get(1));
printer.print(record.get(2));
}
final String s = out.toString();
assertEquals(outString, s);
}
}