Extracted the strategy concept into its own class

git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/sandbox/csv/trunk@399987 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Henri Yandell 2006-05-05 06:24:34 +00:00
parent f047581f95
commit eac54a225b
4 changed files with 249 additions and 298 deletions

View File

@ -65,16 +65,10 @@ public class CSVParser {
/** Token with content when end of a line is reached. */
protected static final int TT_EORECORD = 2;
// the csv definition
private char delimiter;
private char encapsulator;
private char commentStart;
private boolean ignoreLeadingWhitespaces;
private boolean interpretUnicodeEscapes;
private boolean ignoreEmptyLines;
// the input stream
private ExtendedBufferedReader in;
private CSVStrategy strategy;
/**
* Token is an internal token representation.
@ -106,7 +100,7 @@ public class CSVParser {
* @param s CSV String to be parsed.
* @return parsed String matrix (which is never null)
* @throws IOException in case of error
* @see #setCSVStrategy()
* @see #setStrategy()
*/
public static String[][] parse(String s) throws IOException {
if (s == null) {
@ -130,7 +124,7 @@ public class CSVParser {
* @param s CSV String to be parsed.
* @return parsed String vector (which is never null)
* @throws IOException in case of error
* @see #setCSVStrategy()
* @see #setStrategy()
*/
public static String[] parseLine(String s) throws IOException {
if (s == null) {
@ -151,7 +145,7 @@ public class CSVParser {
* Default strategy for the parser follows the default CSV Strategy.
*
* @param input an InputStream containing "csv-formatted" stream
* @see #setCSVStrategy()
* @see #setStrategy()
*/
public CSVParser(InputStream input) {
this(new InputStreamReader(input));
@ -161,7 +155,7 @@ public class CSVParser {
* Default strategy for the parser follows the default CSV Strategy.
*
* @param input a Reader based on "csv-formatted" input
* @see #setCSVStrategy()
* @see #setStrategy()
*/
public CSVParser(Reader input) {
// note: must match default-CSV-strategy !!
@ -172,7 +166,7 @@ public class CSVParser {
* Customized value delimiter parser.
*
* The parser follows the default CSV strategy as defined in
* {@link #setCSVStrategy()} except for the delimiter setting.
* {@link #setStrategy()} except for the delimiter setting.
*
* @param input a Reader based on "csv-formatted" input
* @param delimiter a Char used for value separation
@ -193,18 +187,9 @@ public class CSVParser {
* @param encapsulator a Char used as value encapsulation marker
* @param commentStart a Char used for comment identification
*/
public CSVParser(
Reader input,
char delimiter,
char encapsulator,
char commentStart) {
public CSVParser(Reader input, char delimiter, char encapsulator, char commentStart) {
this.in = new ExtendedBufferedReader(input);
this.setDelimiter(delimiter);
this.setEncapsulator(encapsulator);
this.setCommentStart(commentStart);
this.setIgnoreLeadingWhitespaces(true);
this.setUnicodeEscapeInterpretation(false);
this.setIgnoreEmptyLines(true);
this.strategy = new CSVStrategy(delimiter, encapsulator, commentStart);
}
// ======================================================
@ -350,7 +335,7 @@ public class CSVParser {
c = in.readAgain();
// empty line detection: eol AND (last char was EOL or beginning)
while (ignoreEmptyLines && eol
while (strategy.getIgnoreEmptyLines() && eol
&& (lastChar == '\n'
|| lastChar == ExtendedBufferedReader.UNDEFINED)
&& !isEndOfFile(lastChar)) {
@ -367,7 +352,7 @@ public class CSVParser {
}
// did we reached eof during the last iteration already ? TT_EOF
if (isEndOfFile(lastChar) || (lastChar != delimiter && isEndOfFile(c))) {
if (isEndOfFile(lastChar) || (lastChar != strategy.getDelimiter() && isEndOfFile(c))) {
tkn.type = TT_EOF;
return tkn;
}
@ -381,11 +366,11 @@ public class CSVParser {
eol = isEndOfLine(c);
}
// ok, start of token reached: comment, encapsulated, or token
if (c == commentStart) {
if (c == strategy.getCommentStart()) {
// ignore everything till end of line and continue (incr linecount)
in.readLine();
tkn = nextToken();
} else if (c == delimiter) {
} else if (c == strategy.getDelimiter()) {
// empty token return TT_TOKEN("")
tkn.type = TT_TOKEN;
tkn.isReady = true;
@ -394,7 +379,7 @@ public class CSVParser {
tkn.content.append("");
tkn.type = TT_EORECORD;
tkn.isReady = true;
} else if (c == encapsulator) {
} else if (c == strategy.getEncapsulator()) {
// consume encapsulated token
encapsulatedTokenLexer(tkn, c);
} else if (isEndOfFile(c)) {
@ -405,7 +390,7 @@ public class CSVParser {
} else {
// next token must be a simple token
// add removed blanks when not ignoring whitespace chars...
if (!this.ignoreLeadingWhitespaces) {
if (!strategy.getIgnoreLeadingWhitespaces()) {
tkn.content.append(wsBuf.toString());
}
simpleTokenLexer(tkn, c);
@ -443,11 +428,11 @@ public class CSVParser {
// end of file
tkn.type = TT_EOF;
tkn.isReady = true;
} else if (c == delimiter) {
} else if (c == strategy.getDelimiter()) {
// end of token
tkn.type = TT_TOKEN;
tkn.isReady = true;
} else if (c == '\\' && interpretUnicodeEscapes && in.lookAhead() == 'u') {
} else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
// interpret unicode escaped chars (like \u0070 -> p)
tkn.content.append((char) unicodeEscapeLexer(c));
} else if (isWhitespace(c)) {
@ -493,9 +478,9 @@ public class CSVParser {
// assert c == delimiter;
c = in.read();
while (!tkn.isReady) {
if (c == encapsulator || c == '\\') {
if (c == strategy.getEncapsulator() || c == '\\') {
// check lookahead
if (in.lookAhead() == encapsulator) {
if (in.lookAhead() == strategy.getEncapsulator()) {
// double or escaped encapsulator -> add single encapsulator to token
c = in.read();
tkn.content.append((char) c);
@ -506,7 +491,7 @@ public class CSVParser {
c = in.read();
tkn.content.append((char) c);
} else if (
interpretUnicodeEscapes
strategy.getUnicodeEscapeInterpretation()
&& c == '\\'
&& in.lookAhead() == 'u') {
// interpret unicode escaped chars (like \u0070 -> p)
@ -518,7 +503,7 @@ public class CSVParser {
// token finish mark (encapsulator) reached: ignore whitespace till delimiter
while (!tkn.isReady) {
int n = in.lookAhead();
if (n == delimiter) {
if (n == strategy.getDelimiter()) {
tkn.type = TT_TOKEN;
tkn.isReady = true;
} else if (isEndOfFile(n)) {
@ -589,201 +574,26 @@ public class CSVParser {
}
// ======================================================
// strategy utilities
// strategies
// ======================================================
/**
* Sets the "Default CSV" settings.
*
* The default csv settings are relatively restrictive but implement
* something like the "least-common-basis" of CSV:
* <ul>
* <li> Delimiter of values is comma ',' (as the C in "CSV") </li>
* <li> Complex values encapsulated by '"' </li>
* <li> Comments are not supported </li>
* <li> Leading whitespaces are ignored </li>
* <li> Unicode escapes are not interpreted </li>
* <li> empty lines are skiped </li>
* </ul>
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setCSVStrategy() {
setStrategy(',', '"', (char) 0, true, false, true);
return this;
}
/**
* Sets the "Excel CSV" settings. There are companies out there which
* interpret "C" as an abbreviation for "Semicolon". For these companies the
* following settings might be appropriate:
* <ul>
* <li> Delimiter of values is semicolon ';' </li>
* <li> Complex values encapsulated by '"' </li>
* <li> Comments are not supported </li>
* <li> Leading whitespaces are not ignored </li>
* <li> Unicode escapes are not interpreted </li>
* <li> empty lines are not skiped </li>
* </ul>
* Sets the specified CSV Strategy
*
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setExcelStrategy() {
setStrategy(';', '"', (char) 0, false, false, false);
public CSVParser setStrategy(CSVStrategy strategy) {
this.strategy = strategy;
return this;
}
/**
* Customized CSV strategy setter.
* Obtain the specified CSV Strategy
*
* @param delimiter a Char used for value separation
* @param encapsulator a Char used as value encapsulation marker
* @param commentStart a Char used for comment identification
* @param ignoreLeadingWhitespace TRUE when leading whitespaces should be
* ignored
* @param interpretUnicodeEscapes TRUE when unicode escapes should be
* interpreted
* @param ignoreEmptyLines TRUE when the parser should skip emtpy lines
* @return current instance of CSVParser to allow chained method calls
* @return strategy currently being used
*/
public CSVParser setStrategy(
char delimiter,
char encapsulator,
char commentStart,
boolean ignoreLeadingWhitespace,
boolean interpretUnicodeEscapes,
boolean ignoreEmptyLines) {
this.setDelimiter(delimiter);
this.setEncapsulator(encapsulator);
this.setCommentStart(commentStart);
this.setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace);
this.setUnicodeEscapeInterpretation(interpretUnicodeEscapes);
this.setIgnoreEmptyLines(ignoreEmptyLines);
return this;
}
/**
* Set the desired delimiter.
*
* @param c a Char used for value separation
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setDelimiter(char c) {
this.delimiter = c;
return this;
}
/**
* Gets the delimiter.
*
* @return the delimiter character
*/
public char getDelimiter() {
return this.delimiter;
}
/**
* Set the desired encapsulator.
*
* @param c a Char used as value encapsulation marker
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setEncapsulator(char c) {
this.encapsulator = c;
return this;
}
/**
* Gets the encapsulator character.
*
* @return the encapsulator marker
*/
public char getEncapsulator() {
return this.encapsulator;
}
/**
* Set the desired comment start character.
*
* @param c a Char used for comment identification
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setCommentStart(char c) {
this.commentStart = c;
return this;
}
/**
* Gets the comment identifier.
*
* @return the comment identifier character
*/
public char getCommentStart() {
return this.commentStart;
}
/**
* Enables unicode escape interpretation.
*
* @param b TRUE when interpretation should be enabled
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setUnicodeEscapeInterpretation(boolean b) {
this.interpretUnicodeEscapes = b;
return this;
}
/**
* Shows wether unicode interpretation is enabled.
*
* @return TRUE when unicode interpretation is enabled
*/
public boolean getUnicodeEscapeInterpretation() {
return this.interpretUnicodeEscapes;
}
/**
* Sets the ignore-leading-whitespaces behaviour.
*
* Should the lexer ignore leading whitespaces when parsing non
* encapsulated tokens.
*
* @param b TRUE when leading whitespaces should be ignored
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setIgnoreLeadingWhitespaces(boolean b) {
this.ignoreLeadingWhitespaces = b;
return this;
}
/**
* Shows whether unicode interpretation is enabled.
*
* @return TRUE when unicode interpretation is enabled
*/
public boolean getIgnoreLeadingWhitespaces() {
return this.ignoreLeadingWhitespaces;
}
/**
* Sets the ignore-empty-line behaviour.
*
* When set to 'true' empty lines in the input will be ignored.
*
* @param b TRUE when empty lines in the input should be ignored
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setIgnoreEmptyLines(boolean b) {
this.ignoreEmptyLines = b;
return this;
}
/**
* Shows whether empty lines in the input are ignored.
*
* @return TRUE when empty lines in the input are ignored
*/
public boolean getIgnoreEmptyLines() {
return this.ignoreEmptyLines;
public CSVStrategy getStrategy() {
return this.strategy;
}
// ======================================================

View File

@ -0,0 +1,87 @@
/*
* Copyright 2005 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
/**
* CSVStrategy
*
* Represents the strategy for a CSV.
*/
public class CSVStrategy {
private char delimiter;
private char encapsulator;
private char commentStart;
private boolean ignoreLeadingWhitespaces;
private boolean interpretUnicodeEscapes;
private boolean ignoreEmptyLines;
public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', (char) 0, true, false, true);
public static CSVStrategy EXCEL_STRATEGY = new CSVStrategy(';', '"', (char) 0, false, false, false);
public CSVStrategy(char delimiter, char encapsulator, char commentStart) {
this(delimiter, encapsulator, commentStart, true, false, true);
}
/**
* Customized CSV strategy setter.
*
* @param delimiter a Char used for value separation
* @param encapsulator a Char used as value encapsulation marker
* @param commentStart a Char used for comment identification
* @param ignoreLeadingWhitespace TRUE when leading whitespaces should be
* ignored
* @param interpretUnicodeEscapes TRUE when unicode escapes should be
* interpreted
* @param ignoreEmptyLines TRUE when the parser should skip emtpy lines
* @return current instance of CSVParser to allow chained method calls
*/
public CSVStrategy(
char delimiter,
char encapsulator,
char commentStart,
boolean ignoreLeadingWhitespace,
boolean interpretUnicodeEscapes,
boolean ignoreEmptyLines)
{
setDelimiter(delimiter);
setEncapsulator(encapsulator);
setCommentStart(commentStart);
setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace);
setUnicodeEscapeInterpretation(interpretUnicodeEscapes);
setIgnoreEmptyLines(ignoreEmptyLines);
}
public void setDelimiter(char delimiter) { this.delimiter = delimiter; }
public char getDelimiter() { return this.delimiter; }
public void setEncapsulator(char encapsulator) { this.encapsulator = encapsulator; }
public char getEncapsulator() { return this.encapsulator; }
public void setCommentStart(char commentStart) { this.commentStart = commentStart; }
public char getCommentStart() { return this.commentStart; }
public void setIgnoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces) { this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces; }
public boolean getIgnoreLeadingWhitespaces() { return this.ignoreLeadingWhitespaces; }
public void setUnicodeEscapeInterpretation(boolean interpretUnicodeEscapes) { this.interpretUnicodeEscapes = interpretUnicodeEscapes; }
public boolean getUnicodeEscapeInterpretation() { return this.interpretUnicodeEscapes; }
public void setIgnoreEmptyLines(boolean ignoreEmptyLines) { this.ignoreEmptyLines = ignoreEmptyLines; }
public boolean getIgnoreEmptyLines() { return this.ignoreEmptyLines; }
}

View File

@ -77,67 +77,6 @@ public class CSVParserTest extends TestCase {
}
// ======================================================
// getters / setters
// ======================================================
public void testGetSetCommentStart() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
parser.setCommentStart('#');
assertEquals(parser.getCommentStart(), '#');
parser.setCommentStart('!');
assertEquals(parser.getCommentStart(), '!');
}
public void testGetSetEncapsulator() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
parser.setEncapsulator('"');
assertEquals(parser.getEncapsulator(), '"');
parser.setEncapsulator('\'');
assertEquals(parser.getEncapsulator(), '\'');
}
public void testGetSetDelimiter() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
parser.setDelimiter(';');
assertEquals(parser.getDelimiter(), ';');
parser.setDelimiter(',');
assertEquals(parser.getDelimiter(), ',');
parser.setDelimiter('\t');
assertEquals(parser.getDelimiter(), '\t');
}
public void testSetCSVStrategy() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
// default settings
assertEquals(parser.getDelimiter(), ',');
assertEquals(parser.getEncapsulator(), '"');
assertEquals(parser.getCommentStart(), '\0');
assertEquals(true, parser.getIgnoreLeadingWhitespaces());
assertEquals(false, parser.getUnicodeEscapeInterpretation());
assertEquals(true, parser.getIgnoreEmptyLines());
// explicit csv settings
parser.setCSVStrategy();
assertEquals(parser.getDelimiter(), ',');
assertEquals(parser.getEncapsulator(), '"');
assertEquals(parser.getCommentStart(), '\0');
assertEquals(true, parser.getIgnoreLeadingWhitespaces());
assertEquals(false, parser.getUnicodeEscapeInterpretation());
assertEquals(true, parser.getIgnoreEmptyLines());
}
public void testSetExcelStrategy() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
// explicit Excel settings
parser.setExcelStrategy();
assertEquals(parser.getDelimiter(), ';');
assertEquals(parser.getEncapsulator(), '"');
assertEquals(parser.getCommentStart(), '\0');
assertEquals(false, parser.getIgnoreLeadingWhitespaces());
assertEquals(false, parser.getUnicodeEscapeInterpretation());
assertEquals(false, parser.getIgnoreEmptyLines());
}
// ======================================================
// lexer tests
// ======================================================
@ -146,7 +85,7 @@ public class CSVParserTest extends TestCase {
public void testNextToken1() throws IOException {
String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
TestCSVParser parser = new TestCSVParser(new StringReader(code));
parser.setCSVStrategy();
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
System.out.println("---------\n" + code + "\n-------------");
assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
@ -171,9 +110,9 @@ public class CSVParserTest extends TestCase {
*/
String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
TestCSVParser parser = new TestCSVParser(new StringReader(code));
parser.setIgnoreEmptyLines(false);
parser.setCSVStrategy();
parser.setCommentStart('#');
parser.getStrategy().setIgnoreEmptyLines(false);
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
parser.getStrategy().setCommentStart('#');
System.out.println("---------\n" + code + "\n-------------");
assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
@ -198,8 +137,8 @@ public class CSVParserTest extends TestCase {
*/
String code = "a,\\,,b\n\\,,";
TestCSVParser parser = new TestCSVParser(new StringReader(code));
parser.setCSVStrategy();
parser.setCommentStart('#');
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
parser.getStrategy().setCommentStart('#');
System.out.println("---------\n" + code + "\n-------------");
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
// an unquoted single backslash is not an escape char
@ -222,7 +161,7 @@ public class CSVParserTest extends TestCase {
String code =
"a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
TestCSVParser parser = new TestCSVParser(new StringReader(code));
parser.setCSVStrategy();
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
System.out.println("---------\n" + code + "\n-------------");
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
@ -246,7 +185,7 @@ public class CSVParserTest extends TestCase {
+ ",\"\\,\""
+ ",\"\"\"\"";
TestCSVParser parser = new TestCSVParser(new StringReader(code));
parser.setCSVStrategy();
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
System.out.println("---------\n" + code + "\n-------------");
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
@ -269,9 +208,7 @@ public class CSVParserTest extends TestCase {
*/
String code = "a;'b and \\' more\n'\n!comment;;;;\n;;";
TestCSVParser parser = new TestCSVParser(new StringReader(code));
parser.setDelimiter(';');
parser.setEncapsulator('\'');
parser.setCommentStart('!');
parser.setStrategy( new CSVStrategy(';', '\'', '!') );
System.out.println("---------\n" + code + "\n-------------");
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
assertEquals(
@ -344,7 +281,7 @@ public class CSVParserTest extends TestCase {
{"\"hello\"", " \"world\"", "abc\ndef", ""}
};
CSVParser parser = new CSVParser(new StringReader(code));
parser.setExcelStrategy();
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
System.out.println("---------\n" + code + "\n-------------");
String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length);
@ -364,7 +301,7 @@ public class CSVParserTest extends TestCase {
{"world", ""}
};
CSVParser parser = new CSVParser(new StringReader(code));
parser.setExcelStrategy();
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
System.out.println("---------\n" + code + "\n-------------");
String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length);
@ -397,7 +334,7 @@ public class CSVParserTest extends TestCase {
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
code = codes[codeIndex];
CSVParser parser = new CSVParser(new StringReader(code));
parser.setExcelStrategy();
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
System.out.println("---------\n" + code + "\n-------------");
String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length);
@ -430,7 +367,7 @@ public class CSVParserTest extends TestCase {
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
code = codes[codeIndex];
CSVParser parser = new CSVParser(new StringReader(code));
parser.setCSVStrategy();
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
System.out.println("---------\n" + code + "\n-------------");
String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length);
@ -460,7 +397,7 @@ public class CSVParserTest extends TestCase {
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
code = codes[codeIndex];
CSVParser parser = new CSVParser(new StringReader(code));
parser.setExcelStrategy();
parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
System.out.println("---------\n" + code + "\n-------------");
String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length);
@ -488,7 +425,7 @@ public class CSVParserTest extends TestCase {
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
code = codes[codeIndex];
CSVParser parser = new CSVParser(new StringReader(code));
parser.setCSVStrategy();
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
System.out.println("---------\n" + code + "\n-------------");
String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length);
@ -663,7 +600,7 @@ public class CSVParserTest extends TestCase {
String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
CSVParser parser = new CSVParser(new StringReader(code));
System.out.println("---------\n" + code + "\n-------------");
parser.setUnicodeEscapeInterpretation(true);
parser.getStrategy().setUnicodeEscapeInterpretation(true);
String[] data = parser.getLine();
assertEquals(2, data.length);
assertEquals("abc", data[0]);

View File

@ -0,0 +1,117 @@
/*
* Copyright 2005 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
/**
* CSVStrategyTest
*
* The test are organized in three different sections:
* The 'setter/getter' section, the lexer section and finally the strategy
* section. In case a test fails, you should follow a top-down approach for
* fixing a potential bug (its likely that the strategy itself fails if the lexer
* has problems...).
*/
public class CSVStrategyTest extends TestCase {
/**
* Constructor for JUnit.
* @param name Name to be used in JUnit Test Environment
*/
public CSVStrategyTest(String name) {
super(name);
}
/**
* Returns a Test suite for JUnit.
* @return Test suite for JUnit
*/
public static Test suite() {
return new TestSuite(CSVStrategyTest.class);
}
// ======================================================
// getters / setters
// ======================================================
public void testGetSetCommentStart() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
CSVStrategy strategy = parser.getStrategy();
strategy.setCommentStart('#');
assertEquals(strategy.getCommentStart(), '#');
strategy.setCommentStart('!');
assertEquals(strategy.getCommentStart(), '!');
}
public void testGetSetEncapsulator() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
CSVStrategy strategy = parser.getStrategy();
strategy.setEncapsulator('"');
assertEquals(strategy.getEncapsulator(), '"');
strategy.setEncapsulator('\'');
assertEquals(strategy.getEncapsulator(), '\'');
}
public void testGetSetDelimiter() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
CSVStrategy strategy = parser.getStrategy();
strategy.setDelimiter(';');
assertEquals(strategy.getDelimiter(), ';');
strategy.setDelimiter(',');
assertEquals(strategy.getDelimiter(), ',');
strategy.setDelimiter('\t');
assertEquals(strategy.getDelimiter(), '\t');
}
public void testSetCSVStrategy() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
CSVStrategy strategy = parser.getStrategy();
// default settings
assertEquals(strategy.getDelimiter(), ',');
assertEquals(strategy.getEncapsulator(), '"');
assertEquals(strategy.getCommentStart(), '\0');
assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
assertEquals(true, strategy.getIgnoreEmptyLines());
// explicit csv settings
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
assertEquals(strategy.getDelimiter(), ',');
assertEquals(strategy.getEncapsulator(), '"');
assertEquals(strategy.getCommentStart(), '\0');
assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
assertEquals(true, strategy.getIgnoreEmptyLines());
}
public void testSetExcelStrategy() {
CSVStrategy strategy = CSVStrategy.EXCEL_STRATEGY;
assertEquals(strategy.getDelimiter(), ';');
assertEquals(strategy.getEncapsulator(), '"');
assertEquals(strategy.getCommentStart(), '\0');
assertEquals(false, strategy.getIgnoreLeadingWhitespaces());
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
assertEquals(false, strategy.getIgnoreEmptyLines());
}
}