From eac54a225bc974157e914cf66cfa598171022018 Mon Sep 17 00:00:00 2001 From: Henri Yandell Date: Fri, 5 May 2006 06:24:34 +0000 Subject: [PATCH] Extracted the strategy concept into its own class git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/sandbox/csv/trunk@399987 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/commons/csv/CSVParser.java | 248 ++---------------- .../org/apache/commons/csv/CSVStrategy.java | 87 ++++++ .../org/apache/commons/csv/CSVParserTest.java | 95 ++----- .../apache/commons/csv/CSVStrategyTest.java | 117 +++++++++ 4 files changed, 249 insertions(+), 298 deletions(-) create mode 100644 src/java/org/apache/commons/csv/CSVStrategy.java create mode 100644 src/test/org/apache/commons/csv/CSVStrategyTest.java diff --git a/src/java/org/apache/commons/csv/CSVParser.java b/src/java/org/apache/commons/csv/CSVParser.java index 0b487588..c47f2b9f 100644 --- a/src/java/org/apache/commons/csv/CSVParser.java +++ b/src/java/org/apache/commons/csv/CSVParser.java @@ -65,16 +65,10 @@ public class CSVParser { /** Token with content when end of a line is reached. */ protected static final int TT_EORECORD = 2; - // the csv definition - private char delimiter; - private char encapsulator; - private char commentStart; - private boolean ignoreLeadingWhitespaces; - private boolean interpretUnicodeEscapes; - private boolean ignoreEmptyLines; - // the input stream private ExtendedBufferedReader in; + + private CSVStrategy strategy; /** * Token is an internal token representation. @@ -106,7 +100,7 @@ public class CSVParser { * @param s CSV String to be parsed. * @return parsed String matrix (which is never null) * @throws IOException in case of error - * @see #setCSVStrategy() + * @see #setStrategy() */ public static String[][] parse(String s) throws IOException { if (s == null) { @@ -130,7 +124,7 @@ public class CSVParser { * @param s CSV String to be parsed. * @return parsed String vector (which is never null) * @throws IOException in case of error - * @see #setCSVStrategy() + * @see #setStrategy() */ public static String[] parseLine(String s) throws IOException { if (s == null) { @@ -151,7 +145,7 @@ public class CSVParser { * Default strategy for the parser follows the default CSV Strategy. * * @param input an InputStream containing "csv-formatted" stream - * @see #setCSVStrategy() + * @see #setStrategy() */ public CSVParser(InputStream input) { this(new InputStreamReader(input)); @@ -161,7 +155,7 @@ public class CSVParser { * Default strategy for the parser follows the default CSV Strategy. * * @param input a Reader based on "csv-formatted" input - * @see #setCSVStrategy() + * @see #setStrategy() */ public CSVParser(Reader input) { // note: must match default-CSV-strategy !! @@ -172,7 +166,7 @@ public class CSVParser { * Customized value delimiter parser. * * The parser follows the default CSV strategy as defined in - * {@link #setCSVStrategy()} except for the delimiter setting. + * {@link #setStrategy()} except for the delimiter setting. * * @param input a Reader based on "csv-formatted" input * @param delimiter a Char used for value separation @@ -193,18 +187,9 @@ public class CSVParser { * @param encapsulator a Char used as value encapsulation marker * @param commentStart a Char used for comment identification */ - public CSVParser( - Reader input, - char delimiter, - char encapsulator, - char commentStart) { + public CSVParser(Reader input, char delimiter, char encapsulator, char commentStart) { this.in = new ExtendedBufferedReader(input); - this.setDelimiter(delimiter); - this.setEncapsulator(encapsulator); - this.setCommentStart(commentStart); - this.setIgnoreLeadingWhitespaces(true); - this.setUnicodeEscapeInterpretation(false); - this.setIgnoreEmptyLines(true); + this.strategy = new CSVStrategy(delimiter, encapsulator, commentStart); } // ====================================================== @@ -350,7 +335,7 @@ public class CSVParser { c = in.readAgain(); // empty line detection: eol AND (last char was EOL or beginning) - while (ignoreEmptyLines && eol + while (strategy.getIgnoreEmptyLines() && eol && (lastChar == '\n' || lastChar == ExtendedBufferedReader.UNDEFINED) && !isEndOfFile(lastChar)) { @@ -367,7 +352,7 @@ public class CSVParser { } // did we reached eof during the last iteration already ? TT_EOF - if (isEndOfFile(lastChar) || (lastChar != delimiter && isEndOfFile(c))) { + if (isEndOfFile(lastChar) || (lastChar != strategy.getDelimiter() && isEndOfFile(c))) { tkn.type = TT_EOF; return tkn; } @@ -381,11 +366,11 @@ public class CSVParser { eol = isEndOfLine(c); } // ok, start of token reached: comment, encapsulated, or token - if (c == commentStart) { + if (c == strategy.getCommentStart()) { // ignore everything till end of line and continue (incr linecount) in.readLine(); tkn = nextToken(); - } else if (c == delimiter) { + } else if (c == strategy.getDelimiter()) { // empty token return TT_TOKEN("") tkn.type = TT_TOKEN; tkn.isReady = true; @@ -394,7 +379,7 @@ public class CSVParser { tkn.content.append(""); tkn.type = TT_EORECORD; tkn.isReady = true; - } else if (c == encapsulator) { + } else if (c == strategy.getEncapsulator()) { // consume encapsulated token encapsulatedTokenLexer(tkn, c); } else if (isEndOfFile(c)) { @@ -405,7 +390,7 @@ public class CSVParser { } else { // next token must be a simple token // add removed blanks when not ignoring whitespace chars... - if (!this.ignoreLeadingWhitespaces) { + if (!strategy.getIgnoreLeadingWhitespaces()) { tkn.content.append(wsBuf.toString()); } simpleTokenLexer(tkn, c); @@ -443,11 +428,11 @@ public class CSVParser { // end of file tkn.type = TT_EOF; tkn.isReady = true; - } else if (c == delimiter) { + } else if (c == strategy.getDelimiter()) { // end of token tkn.type = TT_TOKEN; tkn.isReady = true; - } else if (c == '\\' && interpretUnicodeEscapes && in.lookAhead() == 'u') { + } else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') { // interpret unicode escaped chars (like \u0070 -> p) tkn.content.append((char) unicodeEscapeLexer(c)); } else if (isWhitespace(c)) { @@ -493,9 +478,9 @@ public class CSVParser { // assert c == delimiter; c = in.read(); while (!tkn.isReady) { - if (c == encapsulator || c == '\\') { + if (c == strategy.getEncapsulator() || c == '\\') { // check lookahead - if (in.lookAhead() == encapsulator) { + if (in.lookAhead() == strategy.getEncapsulator()) { // double or escaped encapsulator -> add single encapsulator to token c = in.read(); tkn.content.append((char) c); @@ -506,7 +491,7 @@ public class CSVParser { c = in.read(); tkn.content.append((char) c); } else if ( - interpretUnicodeEscapes + strategy.getUnicodeEscapeInterpretation() && c == '\\' && in.lookAhead() == 'u') { // interpret unicode escaped chars (like \u0070 -> p) @@ -518,7 +503,7 @@ public class CSVParser { // token finish mark (encapsulator) reached: ignore whitespace till delimiter while (!tkn.isReady) { int n = in.lookAhead(); - if (n == delimiter) { + if (n == strategy.getDelimiter()) { tkn.type = TT_TOKEN; tkn.isReady = true; } else if (isEndOfFile(n)) { @@ -589,201 +574,26 @@ public class CSVParser { } // ====================================================== - // strategy utilities + // strategies // ====================================================== /** - * Sets the "Default CSV" settings. - * - * The default csv settings are relatively restrictive but implement - * something like the "least-common-basis" of CSV: - * - * @return current instance of CSVParser to allow chained method calls - */ - public CSVParser setCSVStrategy() { - setStrategy(',', '"', (char) 0, true, false, true); - return this; - } - - /** - * Sets the "Excel CSV" settings. There are companies out there which - * interpret "C" as an abbreviation for "Semicolon". For these companies the - * following settings might be appropriate: - * + * Sets the specified CSV Strategy * * @return current instance of CSVParser to allow chained method calls */ - public CSVParser setExcelStrategy() { - setStrategy(';', '"', (char) 0, false, false, false); + public CSVParser setStrategy(CSVStrategy strategy) { + this.strategy = strategy; return this; } /** - * Customized CSV strategy setter. + * Obtain the specified CSV Strategy * - * @param delimiter a Char used for value separation - * @param encapsulator a Char used as value encapsulation marker - * @param commentStart a Char used for comment identification - * @param ignoreLeadingWhitespace TRUE when leading whitespaces should be - * ignored - * @param interpretUnicodeEscapes TRUE when unicode escapes should be - * interpreted - * @param ignoreEmptyLines TRUE when the parser should skip emtpy lines - * @return current instance of CSVParser to allow chained method calls + * @return strategy currently being used */ - public CSVParser setStrategy( - char delimiter, - char encapsulator, - char commentStart, - boolean ignoreLeadingWhitespace, - boolean interpretUnicodeEscapes, - boolean ignoreEmptyLines) { - this.setDelimiter(delimiter); - this.setEncapsulator(encapsulator); - this.setCommentStart(commentStart); - this.setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace); - this.setUnicodeEscapeInterpretation(interpretUnicodeEscapes); - this.setIgnoreEmptyLines(ignoreEmptyLines); - return this; - } - - /** - * Set the desired delimiter. - * - * @param c a Char used for value separation - * @return current instance of CSVParser to allow chained method calls - */ - public CSVParser setDelimiter(char c) { - this.delimiter = c; - return this; - } - - /** - * Gets the delimiter. - * - * @return the delimiter character - */ - public char getDelimiter() { - return this.delimiter; - } - - /** - * Set the desired encapsulator. - * - * @param c a Char used as value encapsulation marker - * @return current instance of CSVParser to allow chained method calls - */ - public CSVParser setEncapsulator(char c) { - this.encapsulator = c; - return this; - } - - /** - * Gets the encapsulator character. - * - * @return the encapsulator marker - */ - public char getEncapsulator() { - return this.encapsulator; - } - - /** - * Set the desired comment start character. - * - * @param c a Char used for comment identification - * @return current instance of CSVParser to allow chained method calls - */ - public CSVParser setCommentStart(char c) { - this.commentStart = c; - return this; - } - - /** - * Gets the comment identifier. - * - * @return the comment identifier character - */ - public char getCommentStart() { - return this.commentStart; - } - - /** - * Enables unicode escape interpretation. - * - * @param b TRUE when interpretation should be enabled - * @return current instance of CSVParser to allow chained method calls - */ - public CSVParser setUnicodeEscapeInterpretation(boolean b) { - this.interpretUnicodeEscapes = b; - return this; - } - - /** - * Shows wether unicode interpretation is enabled. - * - * @return TRUE when unicode interpretation is enabled - */ - public boolean getUnicodeEscapeInterpretation() { - return this.interpretUnicodeEscapes; - } - - /** - * Sets the ignore-leading-whitespaces behaviour. - * - * Should the lexer ignore leading whitespaces when parsing non - * encapsulated tokens. - * - * @param b TRUE when leading whitespaces should be ignored - * @return current instance of CSVParser to allow chained method calls - */ - public CSVParser setIgnoreLeadingWhitespaces(boolean b) { - this.ignoreLeadingWhitespaces = b; - return this; - } - - /** - * Shows whether unicode interpretation is enabled. - * - * @return TRUE when unicode interpretation is enabled - */ - public boolean getIgnoreLeadingWhitespaces() { - return this.ignoreLeadingWhitespaces; - } - - /** - * Sets the ignore-empty-line behaviour. - * - * When set to 'true' empty lines in the input will be ignored. - * - * @param b TRUE when empty lines in the input should be ignored - * @return current instance of CSVParser to allow chained method calls - */ - public CSVParser setIgnoreEmptyLines(boolean b) { - this.ignoreEmptyLines = b; - return this; - } - - /** - * Shows whether empty lines in the input are ignored. - * - * @return TRUE when empty lines in the input are ignored - */ - public boolean getIgnoreEmptyLines() { - return this.ignoreEmptyLines; + public CSVStrategy getStrategy() { + return this.strategy; } // ====================================================== diff --git a/src/java/org/apache/commons/csv/CSVStrategy.java b/src/java/org/apache/commons/csv/CSVStrategy.java new file mode 100644 index 00000000..b8d792d4 --- /dev/null +++ b/src/java/org/apache/commons/csv/CSVStrategy.java @@ -0,0 +1,87 @@ +/* + * Copyright 2005 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.csv; + +/** + * CSVStrategy + * + * Represents the strategy for a CSV. + */ +public class CSVStrategy { + + private char delimiter; + private char encapsulator; + private char commentStart; + private boolean ignoreLeadingWhitespaces; + private boolean interpretUnicodeEscapes; + private boolean ignoreEmptyLines; + + public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', (char) 0, true, false, true); + public static CSVStrategy EXCEL_STRATEGY = new CSVStrategy(';', '"', (char) 0, false, false, false); + + + public CSVStrategy(char delimiter, char encapsulator, char commentStart) { + this(delimiter, encapsulator, commentStart, true, false, true); + } + + /** + * Customized CSV strategy setter. + * + * @param delimiter a Char used for value separation + * @param encapsulator a Char used as value encapsulation marker + * @param commentStart a Char used for comment identification + * @param ignoreLeadingWhitespace TRUE when leading whitespaces should be + * ignored + * @param interpretUnicodeEscapes TRUE when unicode escapes should be + * interpreted + * @param ignoreEmptyLines TRUE when the parser should skip emtpy lines + * @return current instance of CSVParser to allow chained method calls + */ + public CSVStrategy( + char delimiter, + char encapsulator, + char commentStart, + boolean ignoreLeadingWhitespace, + boolean interpretUnicodeEscapes, + boolean ignoreEmptyLines) + { + setDelimiter(delimiter); + setEncapsulator(encapsulator); + setCommentStart(commentStart); + setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace); + setUnicodeEscapeInterpretation(interpretUnicodeEscapes); + setIgnoreEmptyLines(ignoreEmptyLines); + } + + public void setDelimiter(char delimiter) { this.delimiter = delimiter; } + public char getDelimiter() { return this.delimiter; } + + public void setEncapsulator(char encapsulator) { this.encapsulator = encapsulator; } + public char getEncapsulator() { return this.encapsulator; } + + public void setCommentStart(char commentStart) { this.commentStart = commentStart; } + public char getCommentStart() { return this.commentStart; } + + public void setIgnoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces) { this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces; } + public boolean getIgnoreLeadingWhitespaces() { return this.ignoreLeadingWhitespaces; } + + public void setUnicodeEscapeInterpretation(boolean interpretUnicodeEscapes) { this.interpretUnicodeEscapes = interpretUnicodeEscapes; } + public boolean getUnicodeEscapeInterpretation() { return this.interpretUnicodeEscapes; } + + public void setIgnoreEmptyLines(boolean ignoreEmptyLines) { this.ignoreEmptyLines = ignoreEmptyLines; } + public boolean getIgnoreEmptyLines() { return this.ignoreEmptyLines; } + +} diff --git a/src/test/org/apache/commons/csv/CSVParserTest.java b/src/test/org/apache/commons/csv/CSVParserTest.java index d53e79a4..93b840a1 100644 --- a/src/test/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/org/apache/commons/csv/CSVParserTest.java @@ -77,67 +77,6 @@ public class CSVParserTest extends TestCase { } - // ====================================================== - // getters / setters - // ====================================================== - public void testGetSetCommentStart() { - CSVParser parser = new CSVParser(new StringReader("hello world")); - parser.setCommentStart('#'); - assertEquals(parser.getCommentStart(), '#'); - parser.setCommentStart('!'); - assertEquals(parser.getCommentStart(), '!'); - } - - public void testGetSetEncapsulator() { - CSVParser parser = new CSVParser(new StringReader("hello world")); - parser.setEncapsulator('"'); - assertEquals(parser.getEncapsulator(), '"'); - parser.setEncapsulator('\''); - assertEquals(parser.getEncapsulator(), '\''); - } - - public void testGetSetDelimiter() { - CSVParser parser = new CSVParser(new StringReader("hello world")); - parser.setDelimiter(';'); - assertEquals(parser.getDelimiter(), ';'); - parser.setDelimiter(','); - assertEquals(parser.getDelimiter(), ','); - parser.setDelimiter('\t'); - assertEquals(parser.getDelimiter(), '\t'); - } - - public void testSetCSVStrategy() { - CSVParser parser = new CSVParser(new StringReader("hello world")); - // default settings - assertEquals(parser.getDelimiter(), ','); - assertEquals(parser.getEncapsulator(), '"'); - assertEquals(parser.getCommentStart(), '\0'); - assertEquals(true, parser.getIgnoreLeadingWhitespaces()); - assertEquals(false, parser.getUnicodeEscapeInterpretation()); - assertEquals(true, parser.getIgnoreEmptyLines()); - // explicit csv settings - parser.setCSVStrategy(); - assertEquals(parser.getDelimiter(), ','); - assertEquals(parser.getEncapsulator(), '"'); - assertEquals(parser.getCommentStart(), '\0'); - assertEquals(true, parser.getIgnoreLeadingWhitespaces()); - assertEquals(false, parser.getUnicodeEscapeInterpretation()); - assertEquals(true, parser.getIgnoreEmptyLines()); - } - - public void testSetExcelStrategy() { - CSVParser parser = new CSVParser(new StringReader("hello world")); - // explicit Excel settings - parser.setExcelStrategy(); - assertEquals(parser.getDelimiter(), ';'); - assertEquals(parser.getEncapsulator(), '"'); - assertEquals(parser.getCommentStart(), '\0'); - assertEquals(false, parser.getIgnoreLeadingWhitespaces()); - assertEquals(false, parser.getUnicodeEscapeInterpretation()); - assertEquals(false, parser.getIgnoreEmptyLines()); - } - - // ====================================================== // lexer tests // ====================================================== @@ -146,7 +85,7 @@ public class CSVParserTest extends TestCase { public void testNextToken1() throws IOException { String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,"; TestCSVParser parser = new TestCSVParser(new StringReader(code)); - parser.setCSVStrategy(); + parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); System.out.println("---------\n" + code + "\n-------------"); assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken()); @@ -171,9 +110,9 @@ public class CSVParserTest extends TestCase { */ String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n"; TestCSVParser parser = new TestCSVParser(new StringReader(code)); - parser.setIgnoreEmptyLines(false); - parser.setCSVStrategy(); - parser.setCommentStart('#'); + parser.getStrategy().setIgnoreEmptyLines(false); + parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); + parser.getStrategy().setCommentStart('#'); System.out.println("---------\n" + code + "\n-------------"); assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken()); @@ -198,8 +137,8 @@ public class CSVParserTest extends TestCase { */ String code = "a,\\,,b\n\\,,"; TestCSVParser parser = new TestCSVParser(new StringReader(code)); - parser.setCSVStrategy(); - parser.setCommentStart('#'); + parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); + parser.getStrategy().setCommentStart('#'); System.out.println("---------\n" + code + "\n-------------"); assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); // an unquoted single backslash is not an escape char @@ -222,7 +161,7 @@ public class CSVParserTest extends TestCase { String code = "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b"; TestCSVParser parser = new TestCSVParser(new StringReader(code)); - parser.setCSVStrategy(); + parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); System.out.println("---------\n" + code + "\n-------------"); assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken()); @@ -246,7 +185,7 @@ public class CSVParserTest extends TestCase { + ",\"\\,\"" + ",\"\"\"\""; TestCSVParser parser = new TestCSVParser(new StringReader(code)); - parser.setCSVStrategy(); + parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); System.out.println("---------\n" + code + "\n-------------"); assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken()); @@ -269,9 +208,7 @@ public class CSVParserTest extends TestCase { */ String code = "a;'b and \\' more\n'\n!comment;;;;\n;;"; TestCSVParser parser = new TestCSVParser(new StringReader(code)); - parser.setDelimiter(';'); - parser.setEncapsulator('\''); - parser.setCommentStart('!'); + parser.setStrategy( new CSVStrategy(';', '\'', '!') ); System.out.println("---------\n" + code + "\n-------------"); assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); assertEquals( @@ -344,7 +281,7 @@ public class CSVParserTest extends TestCase { {"\"hello\"", " \"world\"", "abc\ndef", ""} }; CSVParser parser = new CSVParser(new StringReader(code)); - parser.setExcelStrategy(); + parser.setStrategy(CSVStrategy.EXCEL_STRATEGY); System.out.println("---------\n" + code + "\n-------------"); String[][] tmp = parser.getAllValues(); assertEquals(res.length, tmp.length); @@ -364,7 +301,7 @@ public class CSVParserTest extends TestCase { {"world", ""} }; CSVParser parser = new CSVParser(new StringReader(code)); - parser.setExcelStrategy(); + parser.setStrategy(CSVStrategy.EXCEL_STRATEGY); System.out.println("---------\n" + code + "\n-------------"); String[][] tmp = parser.getAllValues(); assertEquals(res.length, tmp.length); @@ -397,7 +334,7 @@ public class CSVParserTest extends TestCase { for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { code = codes[codeIndex]; CSVParser parser = new CSVParser(new StringReader(code)); - parser.setExcelStrategy(); + parser.setStrategy(CSVStrategy.EXCEL_STRATEGY); System.out.println("---------\n" + code + "\n-------------"); String[][] tmp = parser.getAllValues(); assertEquals(res.length, tmp.length); @@ -430,7 +367,7 @@ public class CSVParserTest extends TestCase { for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { code = codes[codeIndex]; CSVParser parser = new CSVParser(new StringReader(code)); - parser.setCSVStrategy(); + parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); System.out.println("---------\n" + code + "\n-------------"); String[][] tmp = parser.getAllValues(); assertEquals(res.length, tmp.length); @@ -460,7 +397,7 @@ public class CSVParserTest extends TestCase { for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { code = codes[codeIndex]; CSVParser parser = new CSVParser(new StringReader(code)); - parser.setExcelStrategy(); + parser.setStrategy(CSVStrategy.EXCEL_STRATEGY); System.out.println("---------\n" + code + "\n-------------"); String[][] tmp = parser.getAllValues(); assertEquals(res.length, tmp.length); @@ -488,7 +425,7 @@ public class CSVParserTest extends TestCase { for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { code = codes[codeIndex]; CSVParser parser = new CSVParser(new StringReader(code)); - parser.setCSVStrategy(); + parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); System.out.println("---------\n" + code + "\n-------------"); String[][] tmp = parser.getAllValues(); assertEquals(res.length, tmp.length); @@ -663,7 +600,7 @@ public class CSVParserTest extends TestCase { String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063"; CSVParser parser = new CSVParser(new StringReader(code)); System.out.println("---------\n" + code + "\n-------------"); - parser.setUnicodeEscapeInterpretation(true); + parser.getStrategy().setUnicodeEscapeInterpretation(true); String[] data = parser.getLine(); assertEquals(2, data.length); assertEquals("abc", data[0]); diff --git a/src/test/org/apache/commons/csv/CSVStrategyTest.java b/src/test/org/apache/commons/csv/CSVStrategyTest.java new file mode 100644 index 00000000..142495cf --- /dev/null +++ b/src/test/org/apache/commons/csv/CSVStrategyTest.java @@ -0,0 +1,117 @@ +/* + * Copyright 2005 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.csv; + +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.util.Arrays; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * CSVStrategyTest + * + * The test are organized in three different sections: + * The 'setter/getter' section, the lexer section and finally the strategy + * section. In case a test fails, you should follow a top-down approach for + * fixing a potential bug (its likely that the strategy itself fails if the lexer + * has problems...). + */ +public class CSVStrategyTest extends TestCase { + + /** + * Constructor for JUnit. + * @param name Name to be used in JUnit Test Environment + */ + public CSVStrategyTest(String name) { + super(name); + } + + /** + * Returns a Test suite for JUnit. + * @return Test suite for JUnit + */ + public static Test suite() { + return new TestSuite(CSVStrategyTest.class); + } + + + // ====================================================== + // getters / setters + // ====================================================== + public void testGetSetCommentStart() { + CSVParser parser = new CSVParser(new StringReader("hello world")); + CSVStrategy strategy = parser.getStrategy(); + strategy.setCommentStart('#'); + assertEquals(strategy.getCommentStart(), '#'); + strategy.setCommentStart('!'); + assertEquals(strategy.getCommentStart(), '!'); + } + + public void testGetSetEncapsulator() { + CSVParser parser = new CSVParser(new StringReader("hello world")); + CSVStrategy strategy = parser.getStrategy(); + strategy.setEncapsulator('"'); + assertEquals(strategy.getEncapsulator(), '"'); + strategy.setEncapsulator('\''); + assertEquals(strategy.getEncapsulator(), '\''); + } + + public void testGetSetDelimiter() { + CSVParser parser = new CSVParser(new StringReader("hello world")); + CSVStrategy strategy = parser.getStrategy(); + strategy.setDelimiter(';'); + assertEquals(strategy.getDelimiter(), ';'); + strategy.setDelimiter(','); + assertEquals(strategy.getDelimiter(), ','); + strategy.setDelimiter('\t'); + assertEquals(strategy.getDelimiter(), '\t'); + } + + public void testSetCSVStrategy() { + CSVParser parser = new CSVParser(new StringReader("hello world")); + CSVStrategy strategy = parser.getStrategy(); + // default settings + assertEquals(strategy.getDelimiter(), ','); + assertEquals(strategy.getEncapsulator(), '"'); + assertEquals(strategy.getCommentStart(), '\0'); + assertEquals(true, strategy.getIgnoreLeadingWhitespaces()); + assertEquals(false, strategy.getUnicodeEscapeInterpretation()); + assertEquals(true, strategy.getIgnoreEmptyLines()); + // explicit csv settings + parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY); + assertEquals(strategy.getDelimiter(), ','); + assertEquals(strategy.getEncapsulator(), '"'); + assertEquals(strategy.getCommentStart(), '\0'); + assertEquals(true, strategy.getIgnoreLeadingWhitespaces()); + assertEquals(false, strategy.getUnicodeEscapeInterpretation()); + assertEquals(true, strategy.getIgnoreEmptyLines()); + } + + public void testSetExcelStrategy() { + CSVStrategy strategy = CSVStrategy.EXCEL_STRATEGY; + assertEquals(strategy.getDelimiter(), ';'); + assertEquals(strategy.getEncapsulator(), '"'); + assertEquals(strategy.getCommentStart(), '\0'); + assertEquals(false, strategy.getIgnoreLeadingWhitespaces()); + assertEquals(false, strategy.getUnicodeEscapeInterpretation()); + assertEquals(false, strategy.getIgnoreEmptyLines()); + } + +}