Remove DISABLED character hack.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1397783 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dc7a71979d
commit
50e2719bb6
|
@ -18,6 +18,7 @@
|
||||||
package org.apache.commons.csv;
|
package org.apache.commons.csv;
|
||||||
|
|
||||||
import static org.apache.commons.csv.Constants.COMMA;
|
import static org.apache.commons.csv.Constants.COMMA;
|
||||||
|
import static org.apache.commons.csv.Constants.CR;
|
||||||
import static org.apache.commons.csv.Constants.CRLF;
|
import static org.apache.commons.csv.Constants.CRLF;
|
||||||
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE;
|
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE;
|
||||||
import static org.apache.commons.csv.Constants.ESCAPE;
|
import static org.apache.commons.csv.Constants.ESCAPE;
|
||||||
|
@ -38,30 +39,19 @@ public class CSVFormat implements Serializable {
|
||||||
|
|
||||||
private static final long serialVersionUID = 1L;
|
private static final long serialVersionUID = 1L;
|
||||||
|
|
||||||
private final char delimiter;
|
private final Character delimiter;
|
||||||
private final char encapsulator;
|
private final Character encapsulator;
|
||||||
private final char commentStart;
|
private final Character commentStart;
|
||||||
private final char escape;
|
private final Character escape;
|
||||||
private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
|
private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
|
||||||
private final boolean ignoreEmptyLines;
|
private final boolean ignoreEmptyLines;
|
||||||
private final String lineSeparator; // for outputs
|
private final String lineSeparator; // for outputs
|
||||||
private final String[] header;
|
private final String[] header;
|
||||||
|
|
||||||
private final boolean isEscaping;
|
|
||||||
private final boolean isCommentingEnabled;
|
|
||||||
private final boolean isEncapsulating;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constant char to be used for disabling comments, escapes and encapsulation. The value -2 is used because it
|
|
||||||
* won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two chars
|
|
||||||
* (using surrogates) and thus there should never be a collision with a real text char.
|
|
||||||
*/
|
|
||||||
static final char DISABLED = '\ufffe';
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Starting format with no settings defined; used for creating other formats from scratch.
|
* Starting format with no settings defined; used for creating other formats from scratch.
|
||||||
*/
|
*/
|
||||||
static final CSVFormat PRISTINE = new CSVFormat(DISABLED, DISABLED, DISABLED, DISABLED, false, false, null, null);
|
static final CSVFormat PRISTINE = new CSVFormat(null, null, null, null, false, false, null, null);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Standard comma separated format, as for {@link #RFC4180} but allowing blank lines.
|
* Standard comma separated format, as for {@link #RFC4180} but allowing blank lines.
|
||||||
|
@ -73,8 +63,8 @@ public class CSVFormat implements Serializable {
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
public static final CSVFormat DEFAULT =
|
public static final CSVFormat DEFAULT =
|
||||||
PRISTINE.
|
PRISTINE
|
||||||
withDelimiter(COMMA)
|
.withDelimiter(COMMA)
|
||||||
.withEncapsulator(DOUBLE_QUOTE)
|
.withEncapsulator(DOUBLE_QUOTE)
|
||||||
.withIgnoreEmptyLines(true)
|
.withIgnoreEmptyLines(true)
|
||||||
.withLineSeparator(CRLF);
|
.withLineSeparator(CRLF);
|
||||||
|
@ -89,8 +79,8 @@ public class CSVFormat implements Serializable {
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
public static final CSVFormat RFC4180 =
|
public static final CSVFormat RFC4180 =
|
||||||
PRISTINE.
|
PRISTINE
|
||||||
withDelimiter(COMMA)
|
.withDelimiter(COMMA)
|
||||||
.withEncapsulator(DOUBLE_QUOTE)
|
.withEncapsulator(DOUBLE_QUOTE)
|
||||||
.withLineSeparator(CRLF);
|
.withLineSeparator(CRLF);
|
||||||
|
|
||||||
|
@ -127,7 +117,7 @@ public class CSVFormat implements Serializable {
|
||||||
* @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">
|
* @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">
|
||||||
* http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
|
* http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
|
||||||
*/
|
*/
|
||||||
public static final CSVFormat MYSQL =
|
public static final CSVFormat MYSQL =
|
||||||
PRISTINE
|
PRISTINE
|
||||||
.withDelimiter(TAB)
|
.withDelimiter(TAB)
|
||||||
.withEscape(ESCAPE)
|
.withEscape(ESCAPE)
|
||||||
|
@ -153,7 +143,7 @@ public class CSVFormat implements Serializable {
|
||||||
* @param header
|
* @param header
|
||||||
* the header
|
* the header
|
||||||
*/
|
*/
|
||||||
CSVFormat(final char delimiter, final char encapsulator, final char commentStart, final char escape, final boolean surroundingSpacesIgnored,
|
CSVFormat(final Character delimiter, final Character encapsulator, final Character commentStart, final Character escape, final boolean surroundingSpacesIgnored,
|
||||||
final boolean emptyLinesIgnored, final String lineSeparator, final String[] header) {
|
final boolean emptyLinesIgnored, final String lineSeparator, final String[] header) {
|
||||||
this.delimiter = delimiter;
|
this.delimiter = delimiter;
|
||||||
this.encapsulator = encapsulator;
|
this.encapsulator = encapsulator;
|
||||||
|
@ -163,9 +153,6 @@ public class CSVFormat implements Serializable {
|
||||||
this.ignoreEmptyLines = emptyLinesIgnored;
|
this.ignoreEmptyLines = emptyLinesIgnored;
|
||||||
this.lineSeparator = lineSeparator;
|
this.lineSeparator = lineSeparator;
|
||||||
this.header = header;
|
this.header = header;
|
||||||
this.isEncapsulating = encapsulator != DISABLED;
|
|
||||||
this.isCommentingEnabled = commentStart != DISABLED;
|
|
||||||
this.isEscaping = escape != DISABLED;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -176,8 +163,8 @@ public class CSVFormat implements Serializable {
|
||||||
*
|
*
|
||||||
* @return true if <code>c</code> is a line break character
|
* @return true if <code>c</code> is a line break character
|
||||||
*/
|
*/
|
||||||
private static boolean isLineBreak(final char c) {
|
private static boolean isLineBreak(final Character c) {
|
||||||
return c == '\n' || c == '\r';
|
return c != null && (c == LF || c == CR);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -199,12 +186,12 @@ public class CSVFormat implements Serializable {
|
||||||
commentStart + "\")");
|
commentStart + "\")");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (encapsulator != DISABLED && encapsulator == commentStart) {
|
if (encapsulator != null && encapsulator == commentStart) {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"The comment start character and the encapsulator cannot be the same (\"" + commentStart + "\")");
|
"The comment start character and the encapsulator cannot be the same (\"" + commentStart + "\")");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (escape != DISABLED && escape == commentStart) {
|
if (escape != null && escape == commentStart) {
|
||||||
throw new IllegalArgumentException("The comment start and the escape character cannot be the same (\"" +
|
throw new IllegalArgumentException("The comment start and the escape character cannot be the same (\"" +
|
||||||
commentStart + "\")");
|
commentStart + "\")");
|
||||||
}
|
}
|
||||||
|
@ -229,6 +216,19 @@ public class CSVFormat implements Serializable {
|
||||||
* thrown if the specified character is a line break
|
* thrown if the specified character is a line break
|
||||||
*/
|
*/
|
||||||
public CSVFormat withDelimiter(final char delimiter) {
|
public CSVFormat withDelimiter(final char delimiter) {
|
||||||
|
return withDelimiter(Character.valueOf(delimiter));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a copy of this format using the specified delimiter character.
|
||||||
|
*
|
||||||
|
* @param delimiter
|
||||||
|
* the delimiter character
|
||||||
|
* @return A copy of this format using the specified delimiter character
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* thrown if the specified character is a line break
|
||||||
|
*/
|
||||||
|
public CSVFormat withDelimiter(final Character delimiter) {
|
||||||
if (isLineBreak(delimiter)) {
|
if (isLineBreak(delimiter)) {
|
||||||
throw new IllegalArgumentException("The delimiter cannot be a line break");
|
throw new IllegalArgumentException("The delimiter cannot be a line break");
|
||||||
}
|
}
|
||||||
|
@ -241,7 +241,7 @@ public class CSVFormat implements Serializable {
|
||||||
*
|
*
|
||||||
* @return the encapsulator character
|
* @return the encapsulator character
|
||||||
*/
|
*/
|
||||||
public char getEncapsulator() {
|
public Character getEncapsulator() {
|
||||||
return encapsulator;
|
return encapsulator;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -255,6 +255,19 @@ public class CSVFormat implements Serializable {
|
||||||
* thrown if the specified character is a line break
|
* thrown if the specified character is a line break
|
||||||
*/
|
*/
|
||||||
public CSVFormat withEncapsulator(final char encapsulator) {
|
public CSVFormat withEncapsulator(final char encapsulator) {
|
||||||
|
return withEncapsulator(Character.valueOf(encapsulator));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a copy of this format using the specified encapsulator character.
|
||||||
|
*
|
||||||
|
* @param encapsulator
|
||||||
|
* the encapsulator character
|
||||||
|
* @return A copy of this format using the specified encapsulator character
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* thrown if the specified character is a line break
|
||||||
|
*/
|
||||||
|
public CSVFormat withEncapsulator(final Character encapsulator) {
|
||||||
if (isLineBreak(encapsulator)) {
|
if (isLineBreak(encapsulator)) {
|
||||||
throw new IllegalArgumentException("The encapsulator cannot be a line break");
|
throw new IllegalArgumentException("The encapsulator cannot be a line break");
|
||||||
}
|
}
|
||||||
|
@ -268,7 +281,7 @@ public class CSVFormat implements Serializable {
|
||||||
* @return {@code true} if an encapsulator is defined
|
* @return {@code true} if an encapsulator is defined
|
||||||
*/
|
*/
|
||||||
public boolean isEncapsulating() {
|
public boolean isEncapsulating() {
|
||||||
return isEncapsulating;
|
return encapsulator != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -276,7 +289,7 @@ public class CSVFormat implements Serializable {
|
||||||
*
|
*
|
||||||
* @return the comment start marker.
|
* @return the comment start marker.
|
||||||
*/
|
*/
|
||||||
public char getCommentStart() {
|
public Character getCommentStart() {
|
||||||
return commentStart;
|
return commentStart;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -292,6 +305,21 @@ public class CSVFormat implements Serializable {
|
||||||
* thrown if the specified character is a line break
|
* thrown if the specified character is a line break
|
||||||
*/
|
*/
|
||||||
public CSVFormat withCommentStart(final char commentStart) {
|
public CSVFormat withCommentStart(final char commentStart) {
|
||||||
|
return withCommentStart(Character.valueOf(commentStart));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a copy of this format using the specified character as the comment start marker.
|
||||||
|
*
|
||||||
|
* Note that the comment introducer character is only recognised at the start of a line.
|
||||||
|
*
|
||||||
|
* @param commentStart
|
||||||
|
* the comment start marker
|
||||||
|
* @return A copy of this format using the specified character as the comment start marker
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* thrown if the specified character is a line break
|
||||||
|
*/
|
||||||
|
public CSVFormat withCommentStart(final Character commentStart) {
|
||||||
if (isLineBreak(commentStart)) {
|
if (isLineBreak(commentStart)) {
|
||||||
throw new IllegalArgumentException("The comment start character cannot be a line break");
|
throw new IllegalArgumentException("The comment start character cannot be a line break");
|
||||||
}
|
}
|
||||||
|
@ -307,7 +335,7 @@ public class CSVFormat implements Serializable {
|
||||||
* @return <tt>true</tt> is comments are supported, <tt>false</tt> otherwise
|
* @return <tt>true</tt> is comments are supported, <tt>false</tt> otherwise
|
||||||
*/
|
*/
|
||||||
public boolean isCommentingEnabled() {
|
public boolean isCommentingEnabled() {
|
||||||
return isCommentingEnabled;
|
return commentStart != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -315,7 +343,7 @@ public class CSVFormat implements Serializable {
|
||||||
*
|
*
|
||||||
* @return the escape character
|
* @return the escape character
|
||||||
*/
|
*/
|
||||||
public char getEscape() {
|
public Character getEscape() {
|
||||||
return escape;
|
return escape;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -329,6 +357,19 @@ public class CSVFormat implements Serializable {
|
||||||
* thrown if the specified character is a line break
|
* thrown if the specified character is a line break
|
||||||
*/
|
*/
|
||||||
public CSVFormat withEscape(final char escape) {
|
public CSVFormat withEscape(final char escape) {
|
||||||
|
return withEscape(Character.valueOf(escape));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a copy of this format using the specified escape character.
|
||||||
|
*
|
||||||
|
* @param escape
|
||||||
|
* the escape character
|
||||||
|
* @return A copy of this format using the specified escape character
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* thrown if the specified character is a line break
|
||||||
|
*/
|
||||||
|
public CSVFormat withEscape(final Character escape) {
|
||||||
if (isLineBreak(escape)) {
|
if (isLineBreak(escape)) {
|
||||||
throw new IllegalArgumentException("The escape character cannot be a line break");
|
throw new IllegalArgumentException("The escape character cannot be a line break");
|
||||||
}
|
}
|
||||||
|
@ -342,7 +383,7 @@ public class CSVFormat implements Serializable {
|
||||||
* @return {@code true} if escapes are processed
|
* @return {@code true} if escapes are processed
|
||||||
*/
|
*/
|
||||||
public boolean isEscaping() {
|
public boolean isEscaping() {
|
||||||
return isEscaping;
|
return escape != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -32,14 +32,10 @@ import java.io.IOException;
|
||||||
*/
|
*/
|
||||||
abstract class Lexer {
|
abstract class Lexer {
|
||||||
|
|
||||||
private final boolean isEncapsulating;
|
private final Character delimiter;
|
||||||
private final boolean isEscaping;
|
private final Character escape;
|
||||||
private final boolean isCommentEnabled;
|
private final Character encapsulator;
|
||||||
|
private final Character commmentStart;
|
||||||
private final char delimiter;
|
|
||||||
private final char escape;
|
|
||||||
private final char encapsulator;
|
|
||||||
private final char commmentStart;
|
|
||||||
|
|
||||||
final boolean surroundingSpacesIgnored;
|
final boolean surroundingSpacesIgnored;
|
||||||
final boolean emptyLinesIgnored;
|
final boolean emptyLinesIgnored;
|
||||||
|
@ -52,9 +48,6 @@ abstract class Lexer {
|
||||||
Lexer(final CSVFormat format, final ExtendedBufferedReader in) {
|
Lexer(final CSVFormat format, final ExtendedBufferedReader in) {
|
||||||
this.format = format;
|
this.format = format;
|
||||||
this.in = in;
|
this.in = in;
|
||||||
this.isEncapsulating = format.isEncapsulating();
|
|
||||||
this.isEscaping = format.isEscaping();
|
|
||||||
this.isCommentEnabled = format.isCommentingEnabled();
|
|
||||||
this.delimiter = format.getDelimiter();
|
this.delimiter = format.getDelimiter();
|
||||||
this.escape = format.getEscape();
|
this.escape = format.getEscape();
|
||||||
this.encapsulator = format.getEncapsulator();
|
this.encapsulator = format.getEncapsulator();
|
||||||
|
@ -144,14 +137,14 @@ abstract class Lexer {
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isEscape(final int c) {
|
boolean isEscape(final int c) {
|
||||||
return isEscaping && c == escape;
|
return escape != null && c == escape;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isEncapsulator(final int c) {
|
boolean isEncapsulator(final int c) {
|
||||||
return isEncapsulating && c == encapsulator;
|
return encapsulator != null && c == encapsulator;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isCommentStart(final int c) {
|
boolean isCommentStart(final int c) {
|
||||||
return isCommentEnabled && c == commmentStart;
|
return commmentStart != null && c == commmentStart;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,9 +46,9 @@ public class CSVFormatTest {
|
||||||
format.withIgnoreEmptyLines(false);
|
format.withIgnoreEmptyLines(false);
|
||||||
|
|
||||||
assertEquals('!', format.getDelimiter());
|
assertEquals('!', format.getDelimiter());
|
||||||
assertEquals('!', format.getEncapsulator());
|
assertEquals('!', format.getEncapsulator().charValue());
|
||||||
assertEquals('!', format.getCommentStart());
|
assertEquals('!', format.getCommentStart().charValue());
|
||||||
assertEquals('!', format.getEscape());
|
assertEquals('!', format.getEscape().charValue());
|
||||||
assertEquals(CRLF, format.getLineSeparator());
|
assertEquals(CRLF, format.getLineSeparator());
|
||||||
|
|
||||||
assertTrue(format.getIgnoreSurroundingSpaces());
|
assertTrue(format.getIgnoreSurroundingSpaces());
|
||||||
|
@ -60,10 +60,10 @@ public class CSVFormatTest {
|
||||||
final CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, CRLF, null);
|
final CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, CRLF, null);
|
||||||
|
|
||||||
assertEquals('?', format.withDelimiter('?').getDelimiter());
|
assertEquals('?', format.withDelimiter('?').getDelimiter());
|
||||||
assertEquals('?', format.withEncapsulator('?').getEncapsulator());
|
assertEquals('?', format.withEncapsulator('?').getEncapsulator().charValue());
|
||||||
assertEquals('?', format.withCommentStart('?').getCommentStart());
|
assertEquals('?', format.withCommentStart('?').getCommentStart().charValue());
|
||||||
assertEquals("?", format.withLineSeparator("?").getLineSeparator());
|
assertEquals("?", format.withLineSeparator("?").getLineSeparator());
|
||||||
assertEquals('?', format.withEscape('?').getEscape());
|
assertEquals('?', format.withEscape('?').getEscape().charValue());
|
||||||
|
|
||||||
assertFalse(format.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces());
|
assertFalse(format.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces());
|
||||||
assertFalse(format.withIgnoreEmptyLines(false).getIgnoreEmptyLines());
|
assertFalse(format.withIgnoreEmptyLines(false).getIgnoreEmptyLines());
|
||||||
|
@ -131,7 +131,7 @@ public class CSVFormatTest {
|
||||||
// expected
|
// expected
|
||||||
}
|
}
|
||||||
|
|
||||||
format.withEncapsulator(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate();
|
format.withEncapsulator(null).withCommentStart(null).validate();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
format.withEscape('!').withCommentStart('!').validate();
|
format.withEscape('!').withCommentStart('!').validate();
|
||||||
|
@ -140,7 +140,7 @@ public class CSVFormatTest {
|
||||||
// expected
|
// expected
|
||||||
}
|
}
|
||||||
|
|
||||||
format.withEscape(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate();
|
format.withEscape(null).withCommentStart(null).validate();
|
||||||
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
Loading…
Reference in New Issue