diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 6af298c1..89f6d040 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -122,7 +122,7 @@ public class CSVParser { * @param input a Reader containing "csv-formatted" input */ public CSVParser(Reader input) { - this(input, (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone()); + this(input, CSVStrategy.DEFAULT_STRATEGY); } /** @@ -260,7 +260,7 @@ public class CSVParser { c = in.readAgain(); // empty line detection: eol AND (last char was EOL or beginning) - while (strategy.getIgnoreEmptyLines() && eol + while (strategy.isEmptyLinesIgnored() && eol && (lastChar == '\n' || lastChar == '\r' || lastChar == ExtendedBufferedReader.UNDEFINED) @@ -286,7 +286,7 @@ public class CSVParser { // important: make sure a new char gets consumed in each iteration while (!tkn.isReady && tkn.type != TT_EOF) { // ignore whitespaces at beginning of a token - while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) { + while (strategy.isLeadingSpacesIgnored() && isWhitespace(c) && !eol) { wsBuf.append((char) c); c = in.read(); eol = isEndOfLine(c); @@ -316,7 +316,7 @@ public class CSVParser { } else { // next token must be a simple token // add removed blanks when not ignoring whitespace chars... - if (!strategy.getIgnoreLeadingWhitespaces()) { + if (!strategy.isLeadingSpacesIgnored()) { tkn.content.append(wsBuf); } simpleTokenLexer(tkn, c); @@ -359,7 +359,7 @@ public class CSVParser { tkn.type = TT_TOKEN; tkn.isReady = true; break; - } else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') { + } else if (c == '\\' && strategy.isUnicodeEscapesInterpreted() && in.lookAhead() == 'u') { // interpret unicode escaped chars (like \u0070 -> p) tkn.content.append((char) unicodeEscapeLexer(c)); } else if (c == strategy.getEscape()) { @@ -371,7 +371,7 @@ public class CSVParser { c = in.read(); } - if (strategy.getIgnoreTrailingWhitespaces()) { + if (strategy.isTrailingSpacesIgnored()) { tkn.content.trimTrailingWhitespace(); } @@ -400,7 +400,7 @@ public class CSVParser { for (; ;) { c = in.read(); - if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') { + if (c == '\\' && strategy.isUnicodeEscapesInterpreted() && in.lookAhead() == 'u') { tkn.content.append((char) unicodeEscapeLexer(c)); } else if (c == strategy.getEscape()) { tkn.content.append((char) readEscape(c)); diff --git a/src/main/java/org/apache/commons/csv/CSVPrinter.java b/src/main/java/org/apache/commons/csv/CSVPrinter.java index bb40bafb..99b712bd 100644 --- a/src/main/java/org/apache/commons/csv/CSVPrinter.java +++ b/src/main/java/org/apache/commons/csv/CSVPrinter.java @@ -58,7 +58,7 @@ public class CSVPrinter { * Output a blank line */ public void println() throws IOException { - out.write(strategy.getPrinterNewline()); + out.write(strategy.getLineSeparator()); newLine = true; } diff --git a/src/main/java/org/apache/commons/csv/CSVStrategy.java b/src/main/java/org/apache/commons/csv/CSVStrategy.java index 99c1075d..f79a021b 100644 --- a/src/main/java/org/apache/commons/csv/CSVStrategy.java +++ b/src/main/java/org/apache/commons/csv/CSVStrategy.java @@ -26,17 +26,15 @@ import java.io.Serializable; */ public class CSVStrategy implements Cloneable, Serializable { - private char delimiter; - private char encapsulator; - private char commentStart; - private char escape; - private boolean ignoreLeadingWhitespaces; - private boolean ignoreTrailingWhitespaces; - private boolean interpretUnicodeEscapes; - private boolean ignoreEmptyLines; - - // controls for output - private String printerNewline = "\n"; + private char delimiter = ','; + private char encapsulator = '"'; + private char commentStart = COMMENTS_DISABLED; + private char escape = ESCAPE_DISABLED; + private boolean leadingSpacesIgnored = true; + private boolean trailingSpacesIgnored = true; + private boolean unicodeEscapesInterpreted = false; + private boolean emptyLinesIgnored = true; + private String lineSeparator = "\n"; // -2 is used to signal disabled, because it won't be confused with // an EOF signal (-1), and because \ufffe in UTF-16 would be @@ -46,11 +44,22 @@ public class CSVStrategy implements Cloneable, Serializable { public static final char ESCAPE_DISABLED = (char) -2; public static final char ENCAPSULATOR_DISABLED = (char) -2; + /** Standard comma separated format. */ public static final CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, true, false, true); + + /** Excel file format (using a comma as the value delimiter). */ public static final CSVStrategy EXCEL_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, false, false, false, false); + + /** Tabulation delimited format. */ public static final CSVStrategy TDF_STRATEGY = new CSVStrategy('\t', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, true, false, true); + /** + * Creates a CSVStrategy with the default parameters. + */ + public CSVStrategy() { + } + public CSVStrategy(char delimiter, char encapsulator, char commentStart) { this(delimiter, encapsulator, commentStart, ESCAPE_DISABLED, true, true, false, true); } @@ -62,103 +71,129 @@ public class CSVStrategy implements Cloneable, Serializable { * @param encapsulator a char used as value encapsulation marker * @param commentStart a char used for comment identification * @param escape a char used to escape special characters in values - * @param ignoreLeadingWhitespaces TRUE when leading whitespaces should be ignored - * @param ignoreTrailingWhitespaces TRUE when trailing whitespaces should be ignored - * @param interpretUnicodeEscapes TRUE when unicode escapes should be interpreted - * @param ignoreEmptyLines TRUE when the parser should skip emtpy lines + * @param leadingSpacesIgnored TRUE when leading whitespaces should be ignored + * @param trailingSpacesIgnored TRUE when trailing whitespaces should be ignored + * @param unicodeEscapesInterpreted TRUE when unicode escapes should be interpreted + * @param emptyLinesIgnored TRUE when the parser should skip emtpy lines */ public CSVStrategy( char delimiter, char encapsulator, char commentStart, char escape, - boolean ignoreLeadingWhitespaces, - boolean ignoreTrailingWhitespaces, - boolean interpretUnicodeEscapes, - boolean ignoreEmptyLines) { + boolean leadingSpacesIgnored, + boolean trailingSpacesIgnored, + boolean unicodeEscapesInterpreted, + boolean emptyLinesIgnored) { this.delimiter = delimiter; this.encapsulator = encapsulator; this.commentStart = commentStart; this.escape = escape; - this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces; - this.ignoreTrailingWhitespaces = ignoreTrailingWhitespaces; - this.interpretUnicodeEscapes = interpretUnicodeEscapes; - this.ignoreEmptyLines = ignoreEmptyLines; - } - - public void setDelimiter(char delimiter) { - this.delimiter = delimiter; + this.leadingSpacesIgnored = leadingSpacesIgnored; + this.trailingSpacesIgnored = trailingSpacesIgnored; + this.unicodeEscapesInterpreted = unicodeEscapesInterpreted; + this.emptyLinesIgnored = emptyLinesIgnored; } public char getDelimiter() { - return this.delimiter; + return delimiter; } - public void setEncapsulator(char encapsulator) { - this.encapsulator = encapsulator; + public CSVStrategy withDelimiter(char delimiter) { + CSVStrategy strategy = (CSVStrategy) clone(); + this.delimiter = delimiter; + return strategy; } public char getEncapsulator() { - return this.encapsulator; + return encapsulator; } - public void setCommentStart(char commentStart) { - this.commentStart = commentStart; + public CSVStrategy withEncapsulator(char encapsulator) { + CSVStrategy strategy = (CSVStrategy) clone(); + strategy.encapsulator = encapsulator; + return strategy; } public char getCommentStart() { - return this.commentStart; + return commentStart; + } + + public CSVStrategy withCommentStart(char commentStart) { + CSVStrategy strategy = (CSVStrategy) clone(); + strategy.commentStart = commentStart; + return strategy; } public boolean isCommentingDisabled() { return this.commentStart == COMMENTS_DISABLED; } - public void setEscape(char escape) { - this.escape = escape; - } - public char getEscape() { - return this.escape; + return escape; } - public void setIgnoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces) { - this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces; + public CSVStrategy withEscape(char escape) { + CSVStrategy strategy = (CSVStrategy) clone(); + strategy.escape = escape; + return strategy; } - public boolean getIgnoreLeadingWhitespaces() { - return this.ignoreLeadingWhitespaces; + public boolean isLeadingSpacesIgnored() { + return leadingSpacesIgnored; } - public void setIgnoreTrailingWhitespaces(boolean ignoreTrailingWhitespaces) { - this.ignoreTrailingWhitespaces = ignoreTrailingWhitespaces; + public CSVStrategy withLeadingSpacesIgnored(boolean leadingSpacesIgnored) { + CSVStrategy strategy = (CSVStrategy) clone(); + strategy.leadingSpacesIgnored = leadingSpacesIgnored; + return strategy; } - public boolean getIgnoreTrailingWhitespaces() { - return this.ignoreTrailingWhitespaces; + public boolean isTrailingSpacesIgnored() { + return trailingSpacesIgnored; } - public void setUnicodeEscapeInterpretation(boolean interpretUnicodeEscapes) { - this.interpretUnicodeEscapes = interpretUnicodeEscapes; + public CSVStrategy withTrailingSpacesIgnored(boolean trailingSpacesIgnored) { + CSVStrategy strategy = (CSVStrategy) clone(); + strategy.trailingSpacesIgnored = trailingSpacesIgnored; + return strategy; } - public boolean getUnicodeEscapeInterpretation() { - return this.interpretUnicodeEscapes; + public boolean isUnicodeEscapesInterpreted() { + return unicodeEscapesInterpreted; } - public boolean getIgnoreEmptyLines() { - return this.ignoreEmptyLines; + public CSVStrategy withUnicodeEscapesInterpreted(boolean unicodeEscapesInterpreted) { + CSVStrategy strategy = (CSVStrategy) clone(); + strategy.unicodeEscapesInterpreted = unicodeEscapesInterpreted; + return strategy; } - public String getPrinterNewline() { - return this.printerNewline; + public boolean isEmptyLinesIgnored() { + return emptyLinesIgnored; } - public Object clone() { + public CSVStrategy withEmptyLinesIgnored(boolean emptyLinesIgnored) { + CSVStrategy strategy = (CSVStrategy) clone(); + strategy.emptyLinesIgnored = emptyLinesIgnored; + return strategy; + } + + public String getLineSeparator() { + return lineSeparator; + } + + public CSVStrategy withLineSeparator(String lineSeparator) { + CSVStrategy strategy = (CSVStrategy) clone(); + strategy.lineSeparator = lineSeparator; + return strategy; + } + + protected Object clone() { try { return super.clone(); } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); // impossible + throw (Error) new InternalError().initCause(e); } } } diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index e77670fe..be03c0e4 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -94,10 +94,8 @@ public class CSVParserTest extends TestCase { * */ String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n"; - CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone(); - // strategy.setIgnoreEmptyLines(false); - strategy.setCommentStart('#'); - + CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY.withCommentStart('#'); + TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy); @@ -123,8 +121,7 @@ public class CSVParserTest extends TestCase { * \,, */ String code = "a,\\,,b\n\\,,"; - CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone(); - strategy.setCommentStart('#'); + CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY.withCommentStart('#'); TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy); assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); @@ -520,8 +517,7 @@ public class CSVParserTest extends TestCase { public void testUnicodeEscape() throws IOException { String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063"; - CSVParser parser = new CSVParser(new StringReader(code)); - parser.getStrategy().setUnicodeEscapeInterpretation(true); + CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.DEFAULT_STRATEGY.withUnicodeEscapesInterpreted(true)); String[] data = parser.getLine(); assertEquals(2, data.length); assertEquals("abc", data[0]); diff --git a/src/test/java/org/apache/commons/csv/CSVStrategyTest.java b/src/test/java/org/apache/commons/csv/CSVStrategyTest.java index f95e85dc..abc6c41c 100644 --- a/src/test/java/org/apache/commons/csv/CSVStrategyTest.java +++ b/src/test/java/org/apache/commons/csv/CSVStrategyTest.java @@ -14,76 +14,35 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.commons.csv; import junit.framework.TestCase; -/** - * CSVStrategyTest - * - * The test are organized in three different sections: - * The 'setter/getter' section, the lexer section and finally the strategy - * section. In case a test fails, you should follow a top-down approach for - * fixing a potential bug (its likely that the strategy itself fails if the lexer - * has problems...). - */ public class CSVStrategyTest extends TestCase { - // ====================================================== - // getters / setters - // ====================================================== - public void testGetSetCommentStart() { - CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone(); - strategy.setCommentStart('#'); - assertEquals(strategy.getCommentStart(), '#'); - strategy.setCommentStart('!'); - assertEquals(strategy.getCommentStart(), '!'); - } + public void testImmutalibity() { + CSVStrategy strategy1 = new CSVStrategy('!', '!', '!', '!', true, true, true, true); + CSVStrategy strategy2 = strategy1.withDelimiter('?') + .withEncapsulator('?') + .withCommentStart('?') + .withLineSeparator("?") + .withEscape('?') + .withLeadingSpacesIgnored(false) + .withTrailingSpacesIgnored(false) + .withEmptyLinesIgnored(false) + .withUnicodeEscapesInterpreted(false); - public void testGetSetEncapsulator() { - CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone(); - strategy.setEncapsulator('"'); - assertEquals(strategy.getEncapsulator(), '"'); - strategy.setEncapsulator('\''); - assertEquals(strategy.getEncapsulator(), '\''); - } - - public void testGetSetDelimiter() { - CSVStrategy strategy = (CSVStrategy) CSVStrategy.DEFAULT_STRATEGY.clone(); - strategy.setDelimiter(';'); - assertEquals(strategy.getDelimiter(), ';'); - strategy.setDelimiter(','); - assertEquals(strategy.getDelimiter(), ','); - strategy.setDelimiter('\t'); - assertEquals(strategy.getDelimiter(), '\t'); - } - - public void testSetCSVStrategy() { - CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY; - // default settings - assertEquals(strategy.getDelimiter(), ','); - assertEquals(strategy.getEncapsulator(), '"'); - assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED); - assertEquals(true, strategy.getIgnoreLeadingWhitespaces()); - assertEquals(false, strategy.getUnicodeEscapeInterpretation()); - assertEquals(true, strategy.getIgnoreEmptyLines()); - // explicit csv settings - assertEquals(strategy.getDelimiter(), ','); - assertEquals(strategy.getEncapsulator(), '"'); - assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED); - assertEquals(true, strategy.getIgnoreLeadingWhitespaces()); - assertEquals(false, strategy.getUnicodeEscapeInterpretation()); - assertEquals(true, strategy.getIgnoreEmptyLines()); - } - - public void testSetExcelStrategy() { - CSVStrategy strategy = CSVStrategy.EXCEL_STRATEGY; - assertEquals(strategy.getDelimiter(), ','); - assertEquals(strategy.getEncapsulator(), '"'); - assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED); - assertEquals(false, strategy.getIgnoreLeadingWhitespaces()); - assertEquals(false, strategy.getUnicodeEscapeInterpretation()); - assertEquals(false, strategy.getIgnoreEmptyLines()); + assertNotSame(strategy1.getDelimiter(), strategy2.getDelimiter()); + assertNotSame(strategy1.getEncapsulator(), strategy2.getEncapsulator()); + assertNotSame(strategy1.getCommentStart(), strategy2.getCommentStart()); + assertNotSame(strategy1.getEscape(), strategy2.getEscape()); + assertNotSame(strategy1.getLineSeparator(), strategy2.getLineSeparator()); + + assertNotSame(strategy1.isTrailingSpacesIgnored(), strategy2.isTrailingSpacesIgnored()); + assertNotSame(strategy1.isLeadingSpacesIgnored(), strategy2.isLeadingSpacesIgnored()); + assertNotSame(strategy1.isEmptyLinesIgnored(), strategy2.isEmptyLinesIgnored()); + assertNotSame(strategy1.isUnicodeEscapesInterpreted(), strategy2.isUnicodeEscapesInterpreted()); } }