mirror of
https://github.com/apache/commons-csv.git
synced 2025-03-02 23:09:17 +00:00
CSVStrategy is now immutable (SANDBOX-279)
git-svn-id: https://svn.apache.org/repos/asf/commons/sandbox/csv/trunk@1199827 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fc4ccb426e
commit
42476f4b08
src
main/java/org/apache/commons/csv
test/java/org/apache/commons/csv
@ -122,7 +122,7 @@ public class CSVParser {
|
||||
* @param input a Reader containing "csv-formatted" input
|
||||
*/
|
||||
public CSVParser(Reader input) {
|
||||
this(input, (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone());
|
||||
this(input, CSVStrategy.DEFAULT_STRATEGY);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -260,7 +260,7 @@ public class CSVParser {
|
||||
c = in.readAgain();
|
||||
|
||||
// empty line detection: eol AND (last char was EOL or beginning)
|
||||
while (strategy.getIgnoreEmptyLines() && eol
|
||||
while (strategy.isEmptyLinesIgnored() && eol
|
||||
&& (lastChar == '\n'
|
||||
|| lastChar == '\r'
|
||||
|| lastChar == ExtendedBufferedReader.UNDEFINED)
|
||||
@ -286,7 +286,7 @@ public class CSVParser {
|
||||
// important: make sure a new char gets consumed in each iteration
|
||||
while (!tkn.isReady && tkn.type != TT_EOF) {
|
||||
// ignore whitespaces at beginning of a token
|
||||
while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) {
|
||||
while (strategy.isLeadingSpacesIgnored() && isWhitespace(c) && !eol) {
|
||||
wsBuf.append((char) c);
|
||||
c = in.read();
|
||||
eol = isEndOfLine(c);
|
||||
@ -316,7 +316,7 @@ public class CSVParser {
|
||||
} else {
|
||||
// next token must be a simple token
|
||||
// add removed blanks when not ignoring whitespace chars...
|
||||
if (!strategy.getIgnoreLeadingWhitespaces()) {
|
||||
if (!strategy.isLeadingSpacesIgnored()) {
|
||||
tkn.content.append(wsBuf);
|
||||
}
|
||||
simpleTokenLexer(tkn, c);
|
||||
@ -359,7 +359,7 @@ public class CSVParser {
|
||||
tkn.type = TT_TOKEN;
|
||||
tkn.isReady = true;
|
||||
break;
|
||||
} else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
|
||||
} else if (c == '\\' && strategy.isUnicodeEscapesInterpreted() && in.lookAhead() == 'u') {
|
||||
// interpret unicode escaped chars (like \u0070 -> p)
|
||||
tkn.content.append((char) unicodeEscapeLexer(c));
|
||||
} else if (c == strategy.getEscape()) {
|
||||
@ -371,7 +371,7 @@ public class CSVParser {
|
||||
c = in.read();
|
||||
}
|
||||
|
||||
if (strategy.getIgnoreTrailingWhitespaces()) {
|
||||
if (strategy.isTrailingSpacesIgnored()) {
|
||||
tkn.content.trimTrailingWhitespace();
|
||||
}
|
||||
|
||||
@ -400,7 +400,7 @@ public class CSVParser {
|
||||
for (; ;) {
|
||||
c = in.read();
|
||||
|
||||
if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
|
||||
if (c == '\\' && strategy.isUnicodeEscapesInterpreted() && in.lookAhead() == 'u') {
|
||||
tkn.content.append((char) unicodeEscapeLexer(c));
|
||||
} else if (c == strategy.getEscape()) {
|
||||
tkn.content.append((char) readEscape(c));
|
||||
|
@ -58,7 +58,7 @@ public class CSVPrinter {
|
||||
* Output a blank line
|
||||
*/
|
||||
public void println() throws IOException {
|
||||
out.write(strategy.getPrinterNewline());
|
||||
out.write(strategy.getLineSeparator());
|
||||
newLine = true;
|
||||
}
|
||||
|
||||
|
@ -26,17 +26,15 @@ import java.io.Serializable;
|
||||
*/
|
||||
public class CSVStrategy implements Cloneable, Serializable {
|
||||
|
||||
private char delimiter;
|
||||
private char encapsulator;
|
||||
private char commentStart;
|
||||
private char escape;
|
||||
private boolean ignoreLeadingWhitespaces;
|
||||
private boolean ignoreTrailingWhitespaces;
|
||||
private boolean interpretUnicodeEscapes;
|
||||
private boolean ignoreEmptyLines;
|
||||
|
||||
// controls for output
|
||||
private String printerNewline = "\n";
|
||||
private char delimiter = ',';
|
||||
private char encapsulator = '"';
|
||||
private char commentStart = COMMENTS_DISABLED;
|
||||
private char escape = ESCAPE_DISABLED;
|
||||
private boolean leadingSpacesIgnored = true;
|
||||
private boolean trailingSpacesIgnored = true;
|
||||
private boolean unicodeEscapesInterpreted = false;
|
||||
private boolean emptyLinesIgnored = true;
|
||||
private String lineSeparator = "\n";
|
||||
|
||||
// -2 is used to signal disabled, because it won't be confused with
|
||||
// an EOF signal (-1), and because \ufffe in UTF-16 would be
|
||||
@ -46,11 +44,22 @@ public class CSVStrategy implements Cloneable, Serializable {
|
||||
public static final char ESCAPE_DISABLED = (char) -2;
|
||||
public static final char ENCAPSULATOR_DISABLED = (char) -2;
|
||||
|
||||
/** Standard comma separated format. */
|
||||
public static final CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, true, false, true);
|
||||
|
||||
/** Excel file format (using a comma as the value delimiter). */
|
||||
public static final CSVStrategy EXCEL_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, false, false, false, false);
|
||||
|
||||
/** Tabulation delimited format. */
|
||||
public static final CSVStrategy TDF_STRATEGY = new CSVStrategy('\t', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, true, false, true);
|
||||
|
||||
|
||||
/**
|
||||
* Creates a CSVStrategy with the default parameters.
|
||||
*/
|
||||
public CSVStrategy() {
|
||||
}
|
||||
|
||||
public CSVStrategy(char delimiter, char encapsulator, char commentStart) {
|
||||
this(delimiter, encapsulator, commentStart, ESCAPE_DISABLED, true, true, false, true);
|
||||
}
|
||||
@ -62,103 +71,129 @@ public class CSVStrategy implements Cloneable, Serializable {
|
||||
* @param encapsulator a char used as value encapsulation marker
|
||||
* @param commentStart a char used for comment identification
|
||||
* @param escape a char used to escape special characters in values
|
||||
* @param ignoreLeadingWhitespaces TRUE when leading whitespaces should be ignored
|
||||
* @param ignoreTrailingWhitespaces TRUE when trailing whitespaces should be ignored
|
||||
* @param interpretUnicodeEscapes TRUE when unicode escapes should be interpreted
|
||||
* @param ignoreEmptyLines TRUE when the parser should skip emtpy lines
|
||||
* @param leadingSpacesIgnored TRUE when leading whitespaces should be ignored
|
||||
* @param trailingSpacesIgnored TRUE when trailing whitespaces should be ignored
|
||||
* @param unicodeEscapesInterpreted TRUE when unicode escapes should be interpreted
|
||||
* @param emptyLinesIgnored TRUE when the parser should skip emtpy lines
|
||||
*/
|
||||
public CSVStrategy(
|
||||
char delimiter,
|
||||
char encapsulator,
|
||||
char commentStart,
|
||||
char escape,
|
||||
boolean ignoreLeadingWhitespaces,
|
||||
boolean ignoreTrailingWhitespaces,
|
||||
boolean interpretUnicodeEscapes,
|
||||
boolean ignoreEmptyLines) {
|
||||
boolean leadingSpacesIgnored,
|
||||
boolean trailingSpacesIgnored,
|
||||
boolean unicodeEscapesInterpreted,
|
||||
boolean emptyLinesIgnored) {
|
||||
this.delimiter = delimiter;
|
||||
this.encapsulator = encapsulator;
|
||||
this.commentStart = commentStart;
|
||||
this.escape = escape;
|
||||
this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces;
|
||||
this.ignoreTrailingWhitespaces = ignoreTrailingWhitespaces;
|
||||
this.interpretUnicodeEscapes = interpretUnicodeEscapes;
|
||||
this.ignoreEmptyLines = ignoreEmptyLines;
|
||||
}
|
||||
|
||||
public void setDelimiter(char delimiter) {
|
||||
this.delimiter = delimiter;
|
||||
this.leadingSpacesIgnored = leadingSpacesIgnored;
|
||||
this.trailingSpacesIgnored = trailingSpacesIgnored;
|
||||
this.unicodeEscapesInterpreted = unicodeEscapesInterpreted;
|
||||
this.emptyLinesIgnored = emptyLinesIgnored;
|
||||
}
|
||||
|
||||
public char getDelimiter() {
|
||||
return this.delimiter;
|
||||
return delimiter;
|
||||
}
|
||||
|
||||
public void setEncapsulator(char encapsulator) {
|
||||
this.encapsulator = encapsulator;
|
||||
public CSVStrategy withDelimiter(char delimiter) {
|
||||
CSVStrategy strategy = (CSVStrategy) clone();
|
||||
this.delimiter = delimiter;
|
||||
return strategy;
|
||||
}
|
||||
|
||||
public char getEncapsulator() {
|
||||
return this.encapsulator;
|
||||
return encapsulator;
|
||||
}
|
||||
|
||||
public void setCommentStart(char commentStart) {
|
||||
this.commentStart = commentStart;
|
||||
public CSVStrategy withEncapsulator(char encapsulator) {
|
||||
CSVStrategy strategy = (CSVStrategy) clone();
|
||||
strategy.encapsulator = encapsulator;
|
||||
return strategy;
|
||||
}
|
||||
|
||||
public char getCommentStart() {
|
||||
return this.commentStart;
|
||||
return commentStart;
|
||||
}
|
||||
|
||||
public CSVStrategy withCommentStart(char commentStart) {
|
||||
CSVStrategy strategy = (CSVStrategy) clone();
|
||||
strategy.commentStart = commentStart;
|
||||
return strategy;
|
||||
}
|
||||
|
||||
public boolean isCommentingDisabled() {
|
||||
return this.commentStart == COMMENTS_DISABLED;
|
||||
}
|
||||
|
||||
public void setEscape(char escape) {
|
||||
this.escape = escape;
|
||||
}
|
||||
|
||||
public char getEscape() {
|
||||
return this.escape;
|
||||
return escape;
|
||||
}
|
||||
|
||||
public void setIgnoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces) {
|
||||
this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces;
|
||||
public CSVStrategy withEscape(char escape) {
|
||||
CSVStrategy strategy = (CSVStrategy) clone();
|
||||
strategy.escape = escape;
|
||||
return strategy;
|
||||
}
|
||||
|
||||
public boolean getIgnoreLeadingWhitespaces() {
|
||||
return this.ignoreLeadingWhitespaces;
|
||||
public boolean isLeadingSpacesIgnored() {
|
||||
return leadingSpacesIgnored;
|
||||
}
|
||||
|
||||
public void setIgnoreTrailingWhitespaces(boolean ignoreTrailingWhitespaces) {
|
||||
this.ignoreTrailingWhitespaces = ignoreTrailingWhitespaces;
|
||||
public CSVStrategy withLeadingSpacesIgnored(boolean leadingSpacesIgnored) {
|
||||
CSVStrategy strategy = (CSVStrategy) clone();
|
||||
strategy.leadingSpacesIgnored = leadingSpacesIgnored;
|
||||
return strategy;
|
||||
}
|
||||
|
||||
public boolean getIgnoreTrailingWhitespaces() {
|
||||
return this.ignoreTrailingWhitespaces;
|
||||
public boolean isTrailingSpacesIgnored() {
|
||||
return trailingSpacesIgnored;
|
||||
}
|
||||
|
||||
public void setUnicodeEscapeInterpretation(boolean interpretUnicodeEscapes) {
|
||||
this.interpretUnicodeEscapes = interpretUnicodeEscapes;
|
||||
public CSVStrategy withTrailingSpacesIgnored(boolean trailingSpacesIgnored) {
|
||||
CSVStrategy strategy = (CSVStrategy) clone();
|
||||
strategy.trailingSpacesIgnored = trailingSpacesIgnored;
|
||||
return strategy;
|
||||
}
|
||||
|
||||
public boolean getUnicodeEscapeInterpretation() {
|
||||
return this.interpretUnicodeEscapes;
|
||||
public boolean isUnicodeEscapesInterpreted() {
|
||||
return unicodeEscapesInterpreted;
|
||||
}
|
||||
|
||||
public boolean getIgnoreEmptyLines() {
|
||||
return this.ignoreEmptyLines;
|
||||
public CSVStrategy withUnicodeEscapesInterpreted(boolean unicodeEscapesInterpreted) {
|
||||
CSVStrategy strategy = (CSVStrategy) clone();
|
||||
strategy.unicodeEscapesInterpreted = unicodeEscapesInterpreted;
|
||||
return strategy;
|
||||
}
|
||||
|
||||
public String getPrinterNewline() {
|
||||
return this.printerNewline;
|
||||
public boolean isEmptyLinesIgnored() {
|
||||
return emptyLinesIgnored;
|
||||
}
|
||||
|
||||
public Object clone() {
|
||||
public CSVStrategy withEmptyLinesIgnored(boolean emptyLinesIgnored) {
|
||||
CSVStrategy strategy = (CSVStrategy) clone();
|
||||
strategy.emptyLinesIgnored = emptyLinesIgnored;
|
||||
return strategy;
|
||||
}
|
||||
|
||||
public String getLineSeparator() {
|
||||
return lineSeparator;
|
||||
}
|
||||
|
||||
public CSVStrategy withLineSeparator(String lineSeparator) {
|
||||
CSVStrategy strategy = (CSVStrategy) clone();
|
||||
strategy.lineSeparator = lineSeparator;
|
||||
return strategy;
|
||||
}
|
||||
|
||||
protected Object clone() {
|
||||
try {
|
||||
return super.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new RuntimeException(e); // impossible
|
||||
throw (Error) new InternalError().initCause(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -94,10 +94,8 @@ public class CSVParserTest extends TestCase {
|
||||
*
|
||||
*/
|
||||
String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
|
||||
CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
|
||||
// strategy.setIgnoreEmptyLines(false);
|
||||
strategy.setCommentStart('#');
|
||||
|
||||
CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY.withCommentStart('#');
|
||||
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
|
||||
|
||||
|
||||
@ -123,8 +121,7 @@ public class CSVParserTest extends TestCase {
|
||||
* \,,
|
||||
*/
|
||||
String code = "a,\\,,b\n\\,,";
|
||||
CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
|
||||
strategy.setCommentStart('#');
|
||||
CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY.withCommentStart('#');
|
||||
TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
|
||||
|
||||
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
|
||||
@ -520,8 +517,7 @@ public class CSVParserTest extends TestCase {
|
||||
|
||||
public void testUnicodeEscape() throws IOException {
|
||||
String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
|
||||
CSVParser parser = new CSVParser(new StringReader(code));
|
||||
parser.getStrategy().setUnicodeEscapeInterpretation(true);
|
||||
CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.DEFAULT_STRATEGY.withUnicodeEscapesInterpreted(true));
|
||||
String[] data = parser.getLine();
|
||||
assertEquals(2, data.length);
|
||||
assertEquals("abc", data[0]);
|
||||
|
@ -14,76 +14,35 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.csv;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* CSVStrategyTest
|
||||
*
|
||||
* The test are organized in three different sections:
|
||||
* The 'setter/getter' section, the lexer section and finally the strategy
|
||||
* section. In case a test fails, you should follow a top-down approach for
|
||||
* fixing a potential bug (its likely that the strategy itself fails if the lexer
|
||||
* has problems...).
|
||||
*/
|
||||
public class CSVStrategyTest extends TestCase {
|
||||
|
||||
// ======================================================
|
||||
// getters / setters
|
||||
// ======================================================
|
||||
public void testGetSetCommentStart() {
|
||||
CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
|
||||
strategy.setCommentStart('#');
|
||||
assertEquals(strategy.getCommentStart(), '#');
|
||||
strategy.setCommentStart('!');
|
||||
assertEquals(strategy.getCommentStart(), '!');
|
||||
}
|
||||
public void testImmutalibity() {
|
||||
CSVStrategy strategy1 = new CSVStrategy('!', '!', '!', '!', true, true, true, true);
|
||||
CSVStrategy strategy2 = strategy1.withDelimiter('?')
|
||||
.withEncapsulator('?')
|
||||
.withCommentStart('?')
|
||||
.withLineSeparator("?")
|
||||
.withEscape('?')
|
||||
.withLeadingSpacesIgnored(false)
|
||||
.withTrailingSpacesIgnored(false)
|
||||
.withEmptyLinesIgnored(false)
|
||||
.withUnicodeEscapesInterpreted(false);
|
||||
|
||||
public void testGetSetEncapsulator() {
|
||||
CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
|
||||
strategy.setEncapsulator('"');
|
||||
assertEquals(strategy.getEncapsulator(), '"');
|
||||
strategy.setEncapsulator('\'');
|
||||
assertEquals(strategy.getEncapsulator(), '\'');
|
||||
}
|
||||
|
||||
public void testGetSetDelimiter() {
|
||||
CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone();
|
||||
strategy.setDelimiter(';');
|
||||
assertEquals(strategy.getDelimiter(), ';');
|
||||
strategy.setDelimiter(',');
|
||||
assertEquals(strategy.getDelimiter(), ',');
|
||||
strategy.setDelimiter('\t');
|
||||
assertEquals(strategy.getDelimiter(), '\t');
|
||||
}
|
||||
|
||||
public void testSetCSVStrategy() {
|
||||
CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
|
||||
// default settings
|
||||
assertEquals(strategy.getDelimiter(), ',');
|
||||
assertEquals(strategy.getEncapsulator(), '"');
|
||||
assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
|
||||
assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
|
||||
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
|
||||
assertEquals(true, strategy.getIgnoreEmptyLines());
|
||||
// explicit csv settings
|
||||
assertEquals(strategy.getDelimiter(), ',');
|
||||
assertEquals(strategy.getEncapsulator(), '"');
|
||||
assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
|
||||
assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
|
||||
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
|
||||
assertEquals(true, strategy.getIgnoreEmptyLines());
|
||||
}
|
||||
|
||||
public void testSetExcelStrategy() {
|
||||
CSVStrategy strategy = CSVStrategy.EXCEL_STRATEGY;
|
||||
assertEquals(strategy.getDelimiter(), ',');
|
||||
assertEquals(strategy.getEncapsulator(), '"');
|
||||
assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);
|
||||
assertEquals(false, strategy.getIgnoreLeadingWhitespaces());
|
||||
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
|
||||
assertEquals(false, strategy.getIgnoreEmptyLines());
|
||||
assertNotSame(strategy1.getDelimiter(), strategy2.getDelimiter());
|
||||
assertNotSame(strategy1.getEncapsulator(), strategy2.getEncapsulator());
|
||||
assertNotSame(strategy1.getCommentStart(), strategy2.getCommentStart());
|
||||
assertNotSame(strategy1.getEscape(), strategy2.getEscape());
|
||||
assertNotSame(strategy1.getLineSeparator(), strategy2.getLineSeparator());
|
||||
|
||||
assertNotSame(strategy1.isTrailingSpacesIgnored(), strategy2.isTrailingSpacesIgnored());
|
||||
assertNotSame(strategy1.isLeadingSpacesIgnored(), strategy2.isLeadingSpacesIgnored());
|
||||
assertNotSame(strategy1.isEmptyLinesIgnored(), strategy2.isEmptyLinesIgnored());
|
||||
assertNotSame(strategy1.isUnicodeEscapesInterpreted(), strategy2.isUnicodeEscapesInterpreted());
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user