Remove DISABLED character hack.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1397783 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Gary D. Gregory 2012-10-13 06:27:52 +00:00
parent dc7a71979d
commit 50e2719bb6
3 changed files with 91 additions and 57 deletions

View File

@ -18,6 +18,7 @@
package org.apache.commons.csv; package org.apache.commons.csv;
import static org.apache.commons.csv.Constants.COMMA; import static org.apache.commons.csv.Constants.COMMA;
import static org.apache.commons.csv.Constants.CR;
import static org.apache.commons.csv.Constants.CRLF; import static org.apache.commons.csv.Constants.CRLF;
import static org.apache.commons.csv.Constants.DOUBLE_QUOTE; import static org.apache.commons.csv.Constants.DOUBLE_QUOTE;
import static org.apache.commons.csv.Constants.ESCAPE; import static org.apache.commons.csv.Constants.ESCAPE;
@ -38,30 +39,19 @@ public class CSVFormat implements Serializable {
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
private final char delimiter; private final Character delimiter;
private final char encapsulator; private final Character encapsulator;
private final char commentStart; private final Character commentStart;
private final char escape; private final Character escape;
private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values? private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
private final boolean ignoreEmptyLines; private final boolean ignoreEmptyLines;
private final String lineSeparator; // for outputs private final String lineSeparator; // for outputs
private final String[] header; private final String[] header;
private final boolean isEscaping;
private final boolean isCommentingEnabled;
private final boolean isEncapsulating;
/**
* Constant char to be used for disabling comments, escapes and encapsulation. The value -2 is used because it
* won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two chars
* (using surrogates) and thus there should never be a collision with a real text char.
*/
static final char DISABLED = '\ufffe';
/** /**
* Starting format with no settings defined; used for creating other formats from scratch. * Starting format with no settings defined; used for creating other formats from scratch.
*/ */
static final CSVFormat PRISTINE = new CSVFormat(DISABLED, DISABLED, DISABLED, DISABLED, false, false, null, null); static final CSVFormat PRISTINE = new CSVFormat(null, null, null, null, false, false, null, null);
/** /**
* Standard comma separated format, as for {@link #RFC4180} but allowing blank lines. * Standard comma separated format, as for {@link #RFC4180} but allowing blank lines.
@ -73,8 +63,8 @@ public class CSVFormat implements Serializable {
* </ul> * </ul>
*/ */
public static final CSVFormat DEFAULT = public static final CSVFormat DEFAULT =
PRISTINE. PRISTINE
withDelimiter(COMMA) .withDelimiter(COMMA)
.withEncapsulator(DOUBLE_QUOTE) .withEncapsulator(DOUBLE_QUOTE)
.withIgnoreEmptyLines(true) .withIgnoreEmptyLines(true)
.withLineSeparator(CRLF); .withLineSeparator(CRLF);
@ -89,8 +79,8 @@ public class CSVFormat implements Serializable {
* </ul> * </ul>
*/ */
public static final CSVFormat RFC4180 = public static final CSVFormat RFC4180 =
PRISTINE. PRISTINE
withDelimiter(COMMA) .withDelimiter(COMMA)
.withEncapsulator(DOUBLE_QUOTE) .withEncapsulator(DOUBLE_QUOTE)
.withLineSeparator(CRLF); .withLineSeparator(CRLF);
@ -127,7 +117,7 @@ public class CSVFormat implements Serializable {
* @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html">
* http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a> * http://dev.mysql.com/doc/refman/5.1/en/load-data.html</a>
*/ */
public static final CSVFormat MYSQL = public static final CSVFormat MYSQL =
PRISTINE PRISTINE
.withDelimiter(TAB) .withDelimiter(TAB)
.withEscape(ESCAPE) .withEscape(ESCAPE)
@ -153,7 +143,7 @@ public class CSVFormat implements Serializable {
* @param header * @param header
* the header * the header
*/ */
CSVFormat(final char delimiter, final char encapsulator, final char commentStart, final char escape, final boolean surroundingSpacesIgnored, CSVFormat(final Character delimiter, final Character encapsulator, final Character commentStart, final Character escape, final boolean surroundingSpacesIgnored,
final boolean emptyLinesIgnored, final String lineSeparator, final String[] header) { final boolean emptyLinesIgnored, final String lineSeparator, final String[] header) {
this.delimiter = delimiter; this.delimiter = delimiter;
this.encapsulator = encapsulator; this.encapsulator = encapsulator;
@ -163,9 +153,6 @@ public class CSVFormat implements Serializable {
this.ignoreEmptyLines = emptyLinesIgnored; this.ignoreEmptyLines = emptyLinesIgnored;
this.lineSeparator = lineSeparator; this.lineSeparator = lineSeparator;
this.header = header; this.header = header;
this.isEncapsulating = encapsulator != DISABLED;
this.isCommentingEnabled = commentStart != DISABLED;
this.isEscaping = escape != DISABLED;
} }
/** /**
@ -176,8 +163,8 @@ public class CSVFormat implements Serializable {
* *
* @return true if <code>c</code> is a line break character * @return true if <code>c</code> is a line break character
*/ */
private static boolean isLineBreak(final char c) { private static boolean isLineBreak(final Character c) {
return c == '\n' || c == '\r'; return c != null && (c == LF || c == CR);
} }
/** /**
@ -199,12 +186,12 @@ public class CSVFormat implements Serializable {
commentStart + "\")"); commentStart + "\")");
} }
if (encapsulator != DISABLED && encapsulator == commentStart) { if (encapsulator != null && encapsulator == commentStart) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
"The comment start character and the encapsulator cannot be the same (\"" + commentStart + "\")"); "The comment start character and the encapsulator cannot be the same (\"" + commentStart + "\")");
} }
if (escape != DISABLED && escape == commentStart) { if (escape != null && escape == commentStart) {
throw new IllegalArgumentException("The comment start and the escape character cannot be the same (\"" + throw new IllegalArgumentException("The comment start and the escape character cannot be the same (\"" +
commentStart + "\")"); commentStart + "\")");
} }
@ -229,6 +216,19 @@ public class CSVFormat implements Serializable {
* thrown if the specified character is a line break * thrown if the specified character is a line break
*/ */
public CSVFormat withDelimiter(final char delimiter) { public CSVFormat withDelimiter(final char delimiter) {
return withDelimiter(Character.valueOf(delimiter));
}
/**
* Returns a copy of this format using the specified delimiter character.
*
* @param delimiter
* the delimiter character
* @return A copy of this format using the specified delimiter character
* @throws IllegalArgumentException
* thrown if the specified character is a line break
*/
public CSVFormat withDelimiter(final Character delimiter) {
if (isLineBreak(delimiter)) { if (isLineBreak(delimiter)) {
throw new IllegalArgumentException("The delimiter cannot be a line break"); throw new IllegalArgumentException("The delimiter cannot be a line break");
} }
@ -241,7 +241,7 @@ public class CSVFormat implements Serializable {
* *
* @return the encapsulator character * @return the encapsulator character
*/ */
public char getEncapsulator() { public Character getEncapsulator() {
return encapsulator; return encapsulator;
} }
@ -255,6 +255,19 @@ public class CSVFormat implements Serializable {
* thrown if the specified character is a line break * thrown if the specified character is a line break
*/ */
public CSVFormat withEncapsulator(final char encapsulator) { public CSVFormat withEncapsulator(final char encapsulator) {
return withEncapsulator(Character.valueOf(encapsulator));
}
/**
* Returns a copy of this format using the specified encapsulator character.
*
* @param encapsulator
* the encapsulator character
* @return A copy of this format using the specified encapsulator character
* @throws IllegalArgumentException
* thrown if the specified character is a line break
*/
public CSVFormat withEncapsulator(final Character encapsulator) {
if (isLineBreak(encapsulator)) { if (isLineBreak(encapsulator)) {
throw new IllegalArgumentException("The encapsulator cannot be a line break"); throw new IllegalArgumentException("The encapsulator cannot be a line break");
} }
@ -268,7 +281,7 @@ public class CSVFormat implements Serializable {
* @return {@code true} if an encapsulator is defined * @return {@code true} if an encapsulator is defined
*/ */
public boolean isEncapsulating() { public boolean isEncapsulating() {
return isEncapsulating; return encapsulator != null;
} }
/** /**
@ -276,7 +289,7 @@ public class CSVFormat implements Serializable {
* *
* @return the comment start marker. * @return the comment start marker.
*/ */
public char getCommentStart() { public Character getCommentStart() {
return commentStart; return commentStart;
} }
@ -292,6 +305,21 @@ public class CSVFormat implements Serializable {
* thrown if the specified character is a line break * thrown if the specified character is a line break
*/ */
public CSVFormat withCommentStart(final char commentStart) { public CSVFormat withCommentStart(final char commentStart) {
return withCommentStart(Character.valueOf(commentStart));
}
/**
* Returns a copy of this format using the specified character as the comment start marker.
*
* Note that the comment introducer character is only recognised at the start of a line.
*
* @param commentStart
* the comment start marker
* @return A copy of this format using the specified character as the comment start marker
* @throws IllegalArgumentException
* thrown if the specified character is a line break
*/
public CSVFormat withCommentStart(final Character commentStart) {
if (isLineBreak(commentStart)) { if (isLineBreak(commentStart)) {
throw new IllegalArgumentException("The comment start character cannot be a line break"); throw new IllegalArgumentException("The comment start character cannot be a line break");
} }
@ -307,7 +335,7 @@ public class CSVFormat implements Serializable {
* @return <tt>true</tt> is comments are supported, <tt>false</tt> otherwise * @return <tt>true</tt> is comments are supported, <tt>false</tt> otherwise
*/ */
public boolean isCommentingEnabled() { public boolean isCommentingEnabled() {
return isCommentingEnabled; return commentStart != null;
} }
/** /**
@ -315,7 +343,7 @@ public class CSVFormat implements Serializable {
* *
* @return the escape character * @return the escape character
*/ */
public char getEscape() { public Character getEscape() {
return escape; return escape;
} }
@ -329,6 +357,19 @@ public class CSVFormat implements Serializable {
* thrown if the specified character is a line break * thrown if the specified character is a line break
*/ */
public CSVFormat withEscape(final char escape) { public CSVFormat withEscape(final char escape) {
return withEscape(Character.valueOf(escape));
}
/**
* Returns a copy of this format using the specified escape character.
*
* @param escape
* the escape character
* @return A copy of this format using the specified escape character
* @throws IllegalArgumentException
* thrown if the specified character is a line break
*/
public CSVFormat withEscape(final Character escape) {
if (isLineBreak(escape)) { if (isLineBreak(escape)) {
throw new IllegalArgumentException("The escape character cannot be a line break"); throw new IllegalArgumentException("The escape character cannot be a line break");
} }
@ -342,7 +383,7 @@ public class CSVFormat implements Serializable {
* @return {@code true} if escapes are processed * @return {@code true} if escapes are processed
*/ */
public boolean isEscaping() { public boolean isEscaping() {
return isEscaping; return escape != null;
} }
/** /**

View File

@ -32,14 +32,10 @@ import java.io.IOException;
*/ */
abstract class Lexer { abstract class Lexer {
private final boolean isEncapsulating; private final Character delimiter;
private final boolean isEscaping; private final Character escape;
private final boolean isCommentEnabled; private final Character encapsulator;
private final Character commmentStart;
private final char delimiter;
private final char escape;
private final char encapsulator;
private final char commmentStart;
final boolean surroundingSpacesIgnored; final boolean surroundingSpacesIgnored;
final boolean emptyLinesIgnored; final boolean emptyLinesIgnored;
@ -52,9 +48,6 @@ abstract class Lexer {
Lexer(final CSVFormat format, final ExtendedBufferedReader in) { Lexer(final CSVFormat format, final ExtendedBufferedReader in) {
this.format = format; this.format = format;
this.in = in; this.in = in;
this.isEncapsulating = format.isEncapsulating();
this.isEscaping = format.isEscaping();
this.isCommentEnabled = format.isCommentingEnabled();
this.delimiter = format.getDelimiter(); this.delimiter = format.getDelimiter();
this.escape = format.getEscape(); this.escape = format.getEscape();
this.encapsulator = format.getEncapsulator(); this.encapsulator = format.getEncapsulator();
@ -144,14 +137,14 @@ abstract class Lexer {
} }
boolean isEscape(final int c) { boolean isEscape(final int c) {
return isEscaping && c == escape; return escape != null && c == escape;
} }
boolean isEncapsulator(final int c) { boolean isEncapsulator(final int c) {
return isEncapsulating && c == encapsulator; return encapsulator != null && c == encapsulator;
} }
boolean isCommentStart(final int c) { boolean isCommentStart(final int c) {
return isCommentEnabled && c == commmentStart; return commmentStart != null && c == commmentStart;
} }
} }

View File

@ -46,9 +46,9 @@ public class CSVFormatTest {
format.withIgnoreEmptyLines(false); format.withIgnoreEmptyLines(false);
assertEquals('!', format.getDelimiter()); assertEquals('!', format.getDelimiter());
assertEquals('!', format.getEncapsulator()); assertEquals('!', format.getEncapsulator().charValue());
assertEquals('!', format.getCommentStart()); assertEquals('!', format.getCommentStart().charValue());
assertEquals('!', format.getEscape()); assertEquals('!', format.getEscape().charValue());
assertEquals(CRLF, format.getLineSeparator()); assertEquals(CRLF, format.getLineSeparator());
assertTrue(format.getIgnoreSurroundingSpaces()); assertTrue(format.getIgnoreSurroundingSpaces());
@ -60,10 +60,10 @@ public class CSVFormatTest {
final CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, CRLF, null); final CSVFormat format = new CSVFormat('!', '!', '!', '!', true, true, CRLF, null);
assertEquals('?', format.withDelimiter('?').getDelimiter()); assertEquals('?', format.withDelimiter('?').getDelimiter());
assertEquals('?', format.withEncapsulator('?').getEncapsulator()); assertEquals('?', format.withEncapsulator('?').getEncapsulator().charValue());
assertEquals('?', format.withCommentStart('?').getCommentStart()); assertEquals('?', format.withCommentStart('?').getCommentStart().charValue());
assertEquals("?", format.withLineSeparator("?").getLineSeparator()); assertEquals("?", format.withLineSeparator("?").getLineSeparator());
assertEquals('?', format.withEscape('?').getEscape()); assertEquals('?', format.withEscape('?').getEscape().charValue());
assertFalse(format.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces()); assertFalse(format.withIgnoreSurroundingSpaces(false).getIgnoreSurroundingSpaces());
assertFalse(format.withIgnoreEmptyLines(false).getIgnoreEmptyLines()); assertFalse(format.withIgnoreEmptyLines(false).getIgnoreEmptyLines());
@ -131,7 +131,7 @@ public class CSVFormatTest {
// expected // expected
} }
format.withEncapsulator(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate(); format.withEncapsulator(null).withCommentStart(null).validate();
try { try {
format.withEscape('!').withCommentStart('!').validate(); format.withEscape('!').withCommentStart('!').validate();
@ -140,7 +140,7 @@ public class CSVFormatTest {
// expected // expected
} }
format.withEscape(CSVFormat.DISABLED).withCommentStart(CSVFormat.DISABLED).validate(); format.withEscape(null).withCommentStart(null).validate();
try { try {