1
0
mirror of https://github.com/apache/commons-csv.git synced 2025-03-02 23:09:17 +00:00

CSVStrategy is now immutable (SANDBOX-279)

git-svn-id: https://svn.apache.org/repos/asf/commons/sandbox/csv/trunk@1199827 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Emmanuel Bourg 2011-11-09 16:21:23 +00:00
parent fc4ccb426e
commit 42476f4b08
5 changed files with 126 additions and 136 deletions
src
main/java/org/apache/commons/csv
test/java/org/apache/commons/csv

@ -122,7 +122,7 @@ public class CSVParser {
* @param input a Reader containing "csv-formatted" input
*/
public CSVParser(Reader input) {
this(input, (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone());
this(input, CSVStrategy.DEFAULT_STRATEGY);
}
/**
@ -260,7 +260,7 @@ public class CSVParser {
c = in.readAgain();
// empty line detection: eol AND (last char was EOL or beginning)
while (strategy.getIgnoreEmptyLines() && eol
while (strategy.isEmptyLinesIgnored() && eol
&& (lastChar == '\n'
|| lastChar == '\r'
|| lastChar == ExtendedBufferedReader.UNDEFINED)
@ -286,7 +286,7 @@ public class CSVParser {
// important: make sure a new char gets consumed in each iteration
while (!tkn.isReady && tkn.type != TT_EOF) {
// ignore whitespaces at beginning of a token
while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) {
while (strategy.isLeadingSpacesIgnored() && isWhitespace(c) && !eol) {
wsBuf.append((char) c);
c = in.read();
eol = isEndOfLine(c);
@ -316,7 +316,7 @@ public class CSVParser {
} else {
// next token must be a simple token
// add removed blanks when not ignoring whitespace chars...
if (!strategy.getIgnoreLeadingWhitespaces()) {
if (!strategy.isLeadingSpacesIgnored()) {
tkn.content.append(wsBuf);
}
simpleTokenLexer(tkn, c);
@ -359,7 +359,7 @@ public class CSVParser {
tkn.type = TT_TOKEN;
tkn.isReady = true;
break;
} else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
} else if (c == '\\' && strategy.isUnicodeEscapesInterpreted() && in.lookAhead() == 'u') {
// interpret unicode escaped chars (like \u0070 -> p)
tkn.content.append((char) unicodeEscapeLexer(c));
} else if (c == strategy.getEscape()) {
@ -371,7 +371,7 @@ public class CSVParser {
c = in.read();
}
if (strategy.getIgnoreTrailingWhitespaces()) {
if (strategy.isTrailingSpacesIgnored()) {
tkn.content.trimTrailingWhitespace();
}
@ -400,7 +400,7 @@ public class CSVParser {
for (; ;) {
c = in.read();
if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
if (c == '\\' && strategy.isUnicodeEscapesInterpreted() && in.lookAhead() == 'u') {
tkn.content.append((char) unicodeEscapeLexer(c));
} else if (c == strategy.getEscape()) {
tkn.content.append((char) readEscape(c));

@ -58,7 +58,7 @@ public class CSVPrinter {
* Output a blank line
*/
public void println() throws IOException {
out.write(strategy.getPrinterNewline());
out.write(strategy.getLineSeparator());
newLine = true;
}

@ -26,17 +26,15 @@ import java.io.Serializable;
*/
public class CSVStrategy implements Cloneable, Serializable {
private char delimiter;
private char encapsulator;
private char commentStart;
private char escape;
private boolean ignoreLeadingWhitespaces;
private boolean ignoreTrailingWhitespaces;
private boolean interpretUnicodeEscapes;
private boolean ignoreEmptyLines;
// controls for output
private String printerNewline = "\n";
private char delimiter = ',';
private char encapsulator = '"';
private char commentStart = COMMENTS_DISABLED;
private char escape = ESCAPE_DISABLED;
private boolean leadingSpacesIgnored = true;
private boolean trailingSpacesIgnored = true;
private boolean unicodeEscapesInterpreted = false;
private boolean emptyLinesIgnored = true;
private String lineSeparator = "\n";
// -2 is used to signal disabled, because it won't be confused with
// an EOF signal (-1), and because \ufffe in UTF-16 would be
@ -46,11 +44,22 @@ public class CSVStrategy implements Cloneable, Serializable {
public static final char ESCAPE_DISABLED = (char) -2;
public static final char ENCAPSULATOR_DISABLED = (char) -2;
/** Standard comma separated format. */
public static final CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, true, false, true);
/** Excel file format (using a comma as the value delimiter). */
public static final CSVStrategy EXCEL_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, false, false, false, false);
/** Tabulation delimited format. */
public static final CSVStrategy TDF_STRATEGY = new CSVStrategy('\t', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, true, false, true);
/**
* Creates a CSVStrategy with the default parameters.
*/
public CSVStrategy() {
}
public CSVStrategy(char delimiter, char encapsulator, char commentStart) {
this(delimiter, encapsulator, commentStart, ESCAPE_DISABLED, true, true, false, true);
}
@ -62,103 +71,129 @@ public class CSVStrategy implements Cloneable, Serializable {
* @param encapsulator a char used as value encapsulation marker
* @param commentStart a char used for comment identification
* @param escape a char used to escape special characters in values
* @param ignoreLeadingWhitespaces TRUE when leading whitespaces should be ignored
* @param ignoreTrailingWhitespaces TRUE when trailing whitespaces should be ignored
* @param interpretUnicodeEscapes TRUE when unicode escapes should be interpreted
* @param ignoreEmptyLines TRUE when the parser should skip emtpy lines
* @param leadingSpacesIgnored TRUE when leading whitespaces should be ignored
* @param trailingSpacesIgnored TRUE when trailing whitespaces should be ignored
* @param unicodeEscapesInterpreted TRUE when unicode escapes should be interpreted
* @param emptyLinesIgnored TRUE when the parser should skip emtpy lines
*/
public CSVStrategy(
char delimiter,
char encapsulator,
char commentStart,
char escape,
boolean ignoreLeadingWhitespaces,
boolean ignoreTrailingWhitespaces,
boolean interpretUnicodeEscapes,
boolean ignoreEmptyLines) {
boolean leadingSpacesIgnored,
boolean trailingSpacesIgnored,
boolean unicodeEscapesInterpreted,
boolean emptyLinesIgnored) {
this.delimiter = delimiter;
this.encapsulator = encapsulator;
this.commentStart = commentStart;
this.escape = escape;
this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces;
this.ignoreTrailingWhitespaces = ignoreTrailingWhitespaces;
this.interpretUnicodeEscapes = interpretUnicodeEscapes;
this.ignoreEmptyLines = ignoreEmptyLines;
}
public void setDelimiter(char delimiter) {
this.delimiter = delimiter;
this.leadingSpacesIgnored = leadingSpacesIgnored;
this.trailingSpacesIgnored = trailingSpacesIgnored;
this.unicodeEscapesInterpreted = unicodeEscapesInterpreted;
this.emptyLinesIgnored = emptyLinesIgnored;
}
public char getDelimiter() {
return this.delimiter;
return delimiter;
}
public void setEncapsulator(char encapsulator) {
this.encapsulator = encapsulator;
public CSVStrategy withDelimiter(char delimiter) {
CSVStrategy strategy = (CSVStrategy) clone();
this.delimiter = delimiter;
return strategy;
}
public char getEncapsulator() {
return this.encapsulator;
return encapsulator;
}
public void setCommentStart(char commentStart) {
this.commentStart = commentStart;
public CSVStrategy withEncapsulator(char encapsulator) {
CSVStrategy strategy = (CSVStrategy) clone();
strategy.encapsulator = encapsulator;
return strategy;
}
public char getCommentStart() {
return this.commentStart;
return commentStart;
}
public CSVStrategy withCommentStart(char commentStart) {
CSVStrategy strategy = (CSVStrategy) clone();
strategy.commentStart = commentStart;
return strategy;
}
public boolean isCommentingDisabled() {
return this.commentStart == COMMENTS_DISABLED;
}
public void setEscape(char escape) {
this.escape = escape;
}
public char getEscape() {
return this.escape;
return escape;
}
public void setIgnoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces) {
this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces;
public CSVStrategy withEscape(char escape) {
CSVStrategy strategy = (CSVStrategy) clone();
strategy.escape = escape;
return strategy;
}
public boolean getIgnoreLeadingWhitespaces() {
return this.ignoreLeadingWhitespaces;
public boolean isLeadingSpacesIgnored() {
return leadingSpacesIgnored;
}
public void setIgnoreTrailingWhitespaces(boolean ignoreTrailingWhitespaces) {
this.ignoreTrailingWhitespaces = ignoreTrailingWhitespaces;
public CSVStrategy withLeadingSpacesIgnored(boolean leadingSpacesIgnored) {
CSVStrategy strategy = (CSVStrategy) clone();
strategy.leadingSpacesIgnored = leadingSpacesIgnored;
return strategy;
}
public boolean getIgnoreTrailingWhitespaces() {
return this.ignoreTrailingWhitespaces;
public boolean isTrailingSpacesIgnored() {
return trailingSpacesIgnored;
}
public void setUnicodeEscapeInterpretation(boolean interpretUnicodeEscapes) {
this.interpretUnicodeEscapes = interpretUnicodeEscapes;
public CSVStrategy withTrailingSpacesIgnored(boolean trailingSpacesIgnored) {
CSVStrategy strategy = (CSVStrategy) clone();
strategy.trailingSpacesIgnored = trailingSpacesIgnored;
return strategy;
}
public boolean getUnicodeEscapeInterpretation() {
return this.interpretUnicodeEscapes;
public boolean isUnicodeEscapesInterpreted() {
return unicodeEscapesInterpreted;
}
public boolean getIgnoreEmptyLines() {
return this.ignoreEmptyLines;
public CSVStrategy withUnicodeEscapesInterpreted(boolean unicodeEscapesInterpreted) {
CSVStrategy strategy = (CSVStrategy) clone();
strategy.unicodeEscapesInterpreted = unicodeEscapesInterpreted;
return strategy;
}
public String getPrinterNewline() {
return this.printerNewline;
public boolean isEmptyLinesIgnored() {
return emptyLinesIgnored;
}
public Object clone() {
public CSVStrategy withEmptyLinesIgnored(boolean emptyLinesIgnored) {
CSVStrategy strategy = (CSVStrategy) clone();
strategy.emptyLinesIgnored = emptyLinesIgnored;
return strategy;
}
public String getLineSeparator() {
return lineSeparator;
}
public CSVStrategy withLineSeparator(String lineSeparator) {
CSVStrategy strategy = (CSVStrategy) clone();
strategy.lineSeparator = lineSeparator;
return strategy;
}
protected Object clone() {
try {
return super.clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e); // impossible
throw (Error) new InternalError().initCause(e);
}
}
}

@ -94,10 +94,8 @@ public class CSVParserTest extends TestCase {
*
*/
String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
// strategy.setIgnoreEmptyLines(false);
strategy.setCommentStart('#');
CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY.withCommentStart('#');
TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
@ -123,8 +121,7 @@ public class CSVParserTest extends TestCase {
* \,,
*/
String code = "a,\\,,b\n\\,,";
CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
strategy.setCommentStart('#');
CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY.withCommentStart('#');
TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
@ -520,8 +517,7 @@ public class CSVParserTest extends TestCase {
public void testUnicodeEscape() throws IOException {
String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
CSVParser parser = new CSVParser(new StringReader(code));
parser.getStrategy().setUnicodeEscapeInterpretation(true);
CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.DEFAULT_STRATEGY.withUnicodeEscapesInterpreted(true));
String[] data = parser.getLine();
assertEquals(2, data.length);
assertEquals("abc", data[0]);

@ -14,76 +14,35 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import junit.framework.TestCase;
/**
* CSVStrategyTest
*
* The test are organized in three different sections:
* The 'setter/getter' section, the lexer section and finally the strategy
* section. In case a test fails, you should follow a top-down approach for
* fixing a potential bug (its likely that the strategy itself fails if the lexer
* has problems...).
*/
public class CSVStrategyTest extends TestCase {
// ======================================================
// getters / setters
// ======================================================
public void testGetSetCommentStart() {
CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
strategy.setCommentStart('#');
assertEquals(strategy.getCommentStart(), '#');
strategy.setCommentStart('!');
assertEquals(strategy.getCommentStart(), '!');
}
public void testImmutalibity() {
CSVStrategy strategy1 = new CSVStrategy('!', '!', '!', '!', true, true, true, true);
CSVStrategy strategy2 = strategy1.withDelimiter('?')
.withEncapsulator('?')
.withCommentStart('?')
.withLineSeparator("?")
.withEscape('?')
.withLeadingSpacesIgnored(false)
.withTrailingSpacesIgnored(false)
.withEmptyLinesIgnored(false)
.withUnicodeEscapesInterpreted(false);
public void testGetSetEncapsulator() {
CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
strategy.setEncapsulator('"');
assertEquals(strategy.getEncapsulator(), '"');
strategy.setEncapsulator('\'');
assertEquals(strategy.getEncapsulator(), '\'');
}
public void testGetSetDelimiter() {
CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
strategy.setDelimiter(';');
assertEquals(strategy.getDelimiter(), ';');
strategy.setDelimiter(',');
assertEquals(strategy.getDelimiter(), ',');
strategy.setDelimiter('\t');
assertEquals(strategy.getDelimiter(), '\t');
}
public void testSetCSVStrategy() {
CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
// default settings
assertEquals(strategy.getDelimiter(), ',');
assertEquals(strategy.getEncapsulator(), '"');
assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
assertEquals(true, strategy.getIgnoreEmptyLines());
// explicit csv settings
assertEquals(strategy.getDelimiter(), ',');
assertEquals(strategy.getEncapsulator(), '"');
assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
assertEquals(true, strategy.getIgnoreEmptyLines());
}
public void testSetExcelStrategy() {
CSVStrategy strategy = CSVStrategy.EXCEL_STRATEGY;
assertEquals(strategy.getDelimiter(), ',');
assertEquals(strategy.getEncapsulator(), '"');
assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
assertEquals(false, strategy.getIgnoreLeadingWhitespaces());
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
assertEquals(false, strategy.getIgnoreEmptyLines());
assertNotSame(strategy1.getDelimiter(), strategy2.getDelimiter());
assertNotSame(strategy1.getEncapsulator(), strategy2.getEncapsulator());
assertNotSame(strategy1.getCommentStart(), strategy2.getCommentStart());
assertNotSame(strategy1.getEscape(), strategy2.getEscape());
assertNotSame(strategy1.getLineSeparator(), strategy2.getLineSeparator());
assertNotSame(strategy1.isTrailingSpacesIgnored(), strategy2.isTrailingSpacesIgnored());
assertNotSame(strategy1.isLeadingSpacesIgnored(), strategy2.isLeadingSpacesIgnored());
assertNotSame(strategy1.isEmptyLinesIgnored(), strategy2.isEmptyLinesIgnored());
assertNotSame(strategy1.isUnicodeEscapesInterpreted(), strategy2.isUnicodeEscapesInterpreted());
}
}