Extracted the strategy concept into its own class

git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/sandbox/csv/trunk@399987 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Henri Yandell 2006-05-05 06:24:34 +00:00
parent f047581f95
commit eac54a225b
4 changed files with 249 additions and 298 deletions

View File

@ -65,17 +65,11 @@ public class CSVParser {
/** Token with content when end of a line is reached. */ /** Token with content when end of a line is reached. */
protected static final int TT_EORECORD = 2; protected static final int TT_EORECORD = 2;
// the csv definition
private char delimiter;
private char encapsulator;
private char commentStart;
private boolean ignoreLeadingWhitespaces;
private boolean interpretUnicodeEscapes;
private boolean ignoreEmptyLines;
// the input stream // the input stream
private ExtendedBufferedReader in; private ExtendedBufferedReader in;
private CSVStrategy strategy;
/** /**
* Token is an internal token representation. * Token is an internal token representation.
* *
@ -106,7 +100,7 @@ public class CSVParser {
* @param s CSV String to be parsed. * @param s CSV String to be parsed.
* @return parsed String matrix (which is never null) * @return parsed String matrix (which is never null)
* @throws IOException in case of error * @throws IOException in case of error
* @see #setCSVStrategy() * @see #setStrategy()
*/ */
public static String[][] parse(String s) throws IOException { public static String[][] parse(String s) throws IOException {
if (s == null) { if (s == null) {
@ -130,7 +124,7 @@ public class CSVParser {
* @param s CSV String to be parsed. * @param s CSV String to be parsed.
* @return parsed String vector (which is never null) * @return parsed String vector (which is never null)
* @throws IOException in case of error * @throws IOException in case of error
* @see #setCSVStrategy() * @see #setStrategy()
*/ */
public static String[] parseLine(String s) throws IOException { public static String[] parseLine(String s) throws IOException {
if (s == null) { if (s == null) {
@ -151,7 +145,7 @@ public class CSVParser {
* Default strategy for the parser follows the default CSV Strategy. * Default strategy for the parser follows the default CSV Strategy.
* *
* @param input an InputStream containing "csv-formatted" stream * @param input an InputStream containing "csv-formatted" stream
* @see #setCSVStrategy() * @see #setStrategy()
*/ */
public CSVParser(InputStream input) { public CSVParser(InputStream input) {
this(new InputStreamReader(input)); this(new InputStreamReader(input));
@ -161,7 +155,7 @@ public class CSVParser {
* Default strategy for the parser follows the default CSV Strategy. * Default strategy for the parser follows the default CSV Strategy.
* *
* @param input a Reader based on "csv-formatted" input * @param input a Reader based on "csv-formatted" input
* @see #setCSVStrategy() * @see #setStrategy()
*/ */
public CSVParser(Reader input) { public CSVParser(Reader input) {
// note: must match default-CSV-strategy !! // note: must match default-CSV-strategy !!
@ -172,7 +166,7 @@ public class CSVParser {
* Customized value delimiter parser. * Customized value delimiter parser.
* *
* The parser follows the default CSV strategy as defined in * The parser follows the default CSV strategy as defined in
* {@link #setCSVStrategy()} except for the delimiter setting. * {@link #setStrategy()} except for the delimiter setting.
* *
* @param input a Reader based on "csv-formatted" input * @param input a Reader based on "csv-formatted" input
* @param delimiter a Char used for value separation * @param delimiter a Char used for value separation
@ -193,18 +187,9 @@ public class CSVParser {
* @param encapsulator a Char used as value encapsulation marker * @param encapsulator a Char used as value encapsulation marker
* @param commentStart a Char used for comment identification * @param commentStart a Char used for comment identification
*/ */
public CSVParser( public CSVParser(Reader input, char delimiter, char encapsulator, char commentStart) {
Reader input,
char delimiter,
char encapsulator,
char commentStart) {
this.in = new ExtendedBufferedReader(input); this.in = new ExtendedBufferedReader(input);
this.setDelimiter(delimiter); this.strategy = new CSVStrategy(delimiter, encapsulator, commentStart);
this.setEncapsulator(encapsulator);
this.setCommentStart(commentStart);
this.setIgnoreLeadingWhitespaces(true);
this.setUnicodeEscapeInterpretation(false);
this.setIgnoreEmptyLines(true);
} }
// ====================================================== // ======================================================
@ -350,7 +335,7 @@ public class CSVParser {
c = in.readAgain(); c = in.readAgain();
// empty line detection: eol AND (last char was EOL or beginning) // empty line detection: eol AND (last char was EOL or beginning)
while (ignoreEmptyLines && eol while (strategy.getIgnoreEmptyLines() && eol
&& (lastChar == '\n' && (lastChar == '\n'
|| lastChar == ExtendedBufferedReader.UNDEFINED) || lastChar == ExtendedBufferedReader.UNDEFINED)
&& !isEndOfFile(lastChar)) { && !isEndOfFile(lastChar)) {
@ -367,7 +352,7 @@ public class CSVParser {
} }
// did we reached eof during the last iteration already ? TT_EOF // did we reached eof during the last iteration already ? TT_EOF
if (isEndOfFile(lastChar) || (lastChar != delimiter && isEndOfFile(c))) { if (isEndOfFile(lastChar) || (lastChar != strategy.getDelimiter() && isEndOfFile(c))) {
tkn.type = TT_EOF; tkn.type = TT_EOF;
return tkn; return tkn;
} }
@ -381,11 +366,11 @@ public class CSVParser {
eol = isEndOfLine(c); eol = isEndOfLine(c);
} }
// ok, start of token reached: comment, encapsulated, or token // ok, start of token reached: comment, encapsulated, or token
if (c == commentStart) { if (c == strategy.getCommentStart()) {
// ignore everything till end of line and continue (incr linecount) // ignore everything till end of line and continue (incr linecount)
in.readLine(); in.readLine();
tkn = nextToken(); tkn = nextToken();
} else if (c == delimiter) { } else if (c == strategy.getDelimiter()) {
// empty token return TT_TOKEN("") // empty token return TT_TOKEN("")
tkn.type = TT_TOKEN; tkn.type = TT_TOKEN;
tkn.isReady = true; tkn.isReady = true;
@ -394,7 +379,7 @@ public class CSVParser {
tkn.content.append(""); tkn.content.append("");
tkn.type = TT_EORECORD; tkn.type = TT_EORECORD;
tkn.isReady = true; tkn.isReady = true;
} else if (c == encapsulator) { } else if (c == strategy.getEncapsulator()) {
// consume encapsulated token // consume encapsulated token
encapsulatedTokenLexer(tkn, c); encapsulatedTokenLexer(tkn, c);
} else if (isEndOfFile(c)) { } else if (isEndOfFile(c)) {
@ -405,7 +390,7 @@ public class CSVParser {
} else { } else {
// next token must be a simple token // next token must be a simple token
// add removed blanks when not ignoring whitespace chars... // add removed blanks when not ignoring whitespace chars...
if (!this.ignoreLeadingWhitespaces) { if (!strategy.getIgnoreLeadingWhitespaces()) {
tkn.content.append(wsBuf.toString()); tkn.content.append(wsBuf.toString());
} }
simpleTokenLexer(tkn, c); simpleTokenLexer(tkn, c);
@ -443,11 +428,11 @@ public class CSVParser {
// end of file // end of file
tkn.type = TT_EOF; tkn.type = TT_EOF;
tkn.isReady = true; tkn.isReady = true;
} else if (c == delimiter) { } else if (c == strategy.getDelimiter()) {
// end of token // end of token
tkn.type = TT_TOKEN; tkn.type = TT_TOKEN;
tkn.isReady = true; tkn.isReady = true;
} else if (c == '\\' && interpretUnicodeEscapes && in.lookAhead() == 'u') { } else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
// interpret unicode escaped chars (like \u0070 -> p) // interpret unicode escaped chars (like \u0070 -> p)
tkn.content.append((char) unicodeEscapeLexer(c)); tkn.content.append((char) unicodeEscapeLexer(c));
} else if (isWhitespace(c)) { } else if (isWhitespace(c)) {
@ -493,9 +478,9 @@ public class CSVParser {
// assert c == delimiter; // assert c == delimiter;
c = in.read(); c = in.read();
while (!tkn.isReady) { while (!tkn.isReady) {
if (c == encapsulator || c == '\\') { if (c == strategy.getEncapsulator() || c == '\\') {
// check lookahead // check lookahead
if (in.lookAhead() == encapsulator) { if (in.lookAhead() == strategy.getEncapsulator()) {
// double or escaped encapsulator -> add single encapsulator to token // double or escaped encapsulator -> add single encapsulator to token
c = in.read(); c = in.read();
tkn.content.append((char) c); tkn.content.append((char) c);
@ -506,7 +491,7 @@ public class CSVParser {
c = in.read(); c = in.read();
tkn.content.append((char) c); tkn.content.append((char) c);
} else if ( } else if (
interpretUnicodeEscapes strategy.getUnicodeEscapeInterpretation()
&& c == '\\' && c == '\\'
&& in.lookAhead() == 'u') { && in.lookAhead() == 'u') {
// interpret unicode escaped chars (like \u0070 -> p) // interpret unicode escaped chars (like \u0070 -> p)
@ -518,7 +503,7 @@ public class CSVParser {
// token finish mark (encapsulator) reached: ignore whitespace till delimiter // token finish mark (encapsulator) reached: ignore whitespace till delimiter
while (!tkn.isReady) { while (!tkn.isReady) {
int n = in.lookAhead(); int n = in.lookAhead();
if (n == delimiter) { if (n == strategy.getDelimiter()) {
tkn.type = TT_TOKEN; tkn.type = TT_TOKEN;
tkn.isReady = true; tkn.isReady = true;
} else if (isEndOfFile(n)) { } else if (isEndOfFile(n)) {
@ -589,201 +574,26 @@ public class CSVParser {
} }
// ====================================================== // ======================================================
// strategy utilities // strategies
// ====================================================== // ======================================================
/** /**
* Sets the "Default CSV" settings. * Sets the specified CSV Strategy
* *
* The default csv settings are relatively restrictive but implement
* something like the "least-common-basis" of CSV:
* <ul>
* <li> Delimiter of values is comma ',' (as the C in "CSV") </li>
* <li> Complex values encapsulated by '"' </li>
* <li> Comments are not supported </li>
* <li> Leading whitespaces are ignored </li>
* <li> Unicode escapes are not interpreted </li>
* <li> empty lines are skiped </li>
* </ul>
* @return current instance of CSVParser to allow chained method calls * @return current instance of CSVParser to allow chained method calls
*/ */
public CSVParser setCSVStrategy() { public CSVParser setStrategy(CSVStrategy strategy) {
setStrategy(',', '"', (char) 0, true, false, true); this.strategy = strategy;
return this; return this;
} }
/** /**
* Sets the "Excel CSV" settings. There are companies out there which * Obtain the specified CSV Strategy
* interpret "C" as an abbreviation for "Semicolon". For these companies the
* following settings might be appropriate:
* <ul>
* <li> Delimiter of values is semicolon ';' </li>
* <li> Complex values encapsulated by '"' </li>
* <li> Comments are not supported </li>
* <li> Leading whitespaces are not ignored </li>
* <li> Unicode escapes are not interpreted </li>
* <li> empty lines are not skiped </li>
* </ul>
* *
* @return current instance of CSVParser to allow chained method calls * @return strategy currently being used
*/ */
public CSVParser setExcelStrategy() { public CSVStrategy getStrategy() {
setStrategy(';', '"', (char) 0, false, false, false); return this.strategy;
return this;
}
/**
* Customized CSV strategy setter.
*
* @param delimiter a Char used for value separation
* @param encapsulator a Char used as value encapsulation marker
* @param commentStart a Char used for comment identification
* @param ignoreLeadingWhitespace TRUE when leading whitespaces should be
* ignored
* @param interpretUnicodeEscapes TRUE when unicode escapes should be
* interpreted
* @param ignoreEmptyLines TRUE when the parser should skip emtpy lines
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setStrategy(
char delimiter,
char encapsulator,
char commentStart,
boolean ignoreLeadingWhitespace,
boolean interpretUnicodeEscapes,
boolean ignoreEmptyLines) {
this.setDelimiter(delimiter);
this.setEncapsulator(encapsulator);
this.setCommentStart(commentStart);
this.setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace);
this.setUnicodeEscapeInterpretation(interpretUnicodeEscapes);
this.setIgnoreEmptyLines(ignoreEmptyLines);
return this;
}
/**
* Set the desired delimiter.
*
* @param c a Char used for value separation
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setDelimiter(char c) {
this.delimiter = c;
return this;
}
/**
* Gets the delimiter.
*
* @return the delimiter character
*/
public char getDelimiter() {
return this.delimiter;
}
/**
* Set the desired encapsulator.
*
* @param c a Char used as value encapsulation marker
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setEncapsulator(char c) {
this.encapsulator = c;
return this;
}
/**
* Gets the encapsulator character.
*
* @return the encapsulator marker
*/
public char getEncapsulator() {
return this.encapsulator;
}
/**
* Set the desired comment start character.
*
* @param c a Char used for comment identification
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setCommentStart(char c) {
this.commentStart = c;
return this;
}
/**
* Gets the comment identifier.
*
* @return the comment identifier character
*/
public char getCommentStart() {
return this.commentStart;
}
/**
* Enables unicode escape interpretation.
*
* @param b TRUE when interpretation should be enabled
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setUnicodeEscapeInterpretation(boolean b) {
this.interpretUnicodeEscapes = b;
return this;
}
/**
* Shows wether unicode interpretation is enabled.
*
* @return TRUE when unicode interpretation is enabled
*/
public boolean getUnicodeEscapeInterpretation() {
return this.interpretUnicodeEscapes;
}
/**
* Sets the ignore-leading-whitespaces behaviour.
*
* Should the lexer ignore leading whitespaces when parsing non
* encapsulated tokens.
*
* @param b TRUE when leading whitespaces should be ignored
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setIgnoreLeadingWhitespaces(boolean b) {
this.ignoreLeadingWhitespaces = b;
return this;
}
/**
* Shows whether unicode interpretation is enabled.
*
* @return TRUE when unicode interpretation is enabled
*/
public boolean getIgnoreLeadingWhitespaces() {
return this.ignoreLeadingWhitespaces;
}
/**
* Sets the ignore-empty-line behaviour.
*
* When set to 'true' empty lines in the input will be ignored.
*
* @param b TRUE when empty lines in the input should be ignored
* @return current instance of CSVParser to allow chained method calls
*/
public CSVParser setIgnoreEmptyLines(boolean b) {
this.ignoreEmptyLines = b;
return this;
}
/**
* Shows whether empty lines in the input are ignored.
*
* @return TRUE when empty lines in the input are ignored
*/
public boolean getIgnoreEmptyLines() {
return this.ignoreEmptyLines;
} }
// ====================================================== // ======================================================

View File

@ -0,0 +1,87 @@
/*
* Copyright 2005 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
/**
* CSVStrategy
*
* Represents the strategy for a CSV.
*/
public class CSVStrategy {
private char delimiter;
private char encapsulator;
private char commentStart;
private boolean ignoreLeadingWhitespaces;
private boolean interpretUnicodeEscapes;
private boolean ignoreEmptyLines;
public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', (char) 0, true, false, true);
public static CSVStrategy EXCEL_STRATEGY = new CSVStrategy(';', '"', (char) 0, false, false, false);
public CSVStrategy(char delimiter, char encapsulator, char commentStart) {
this(delimiter, encapsulator, commentStart, true, false, true);
}
/**
* Customized CSV strategy setter.
*
* @param delimiter a Char used for value separation
* @param encapsulator a Char used as value encapsulation marker
* @param commentStart a Char used for comment identification
* @param ignoreLeadingWhitespace TRUE when leading whitespaces should be
* ignored
* @param interpretUnicodeEscapes TRUE when unicode escapes should be
* interpreted
* @param ignoreEmptyLines TRUE when the parser should skip emtpy lines
* @return current instance of CSVParser to allow chained method calls
*/
public CSVStrategy(
char delimiter,
char encapsulator,
char commentStart,
boolean ignoreLeadingWhitespace,
boolean interpretUnicodeEscapes,
boolean ignoreEmptyLines)
{
setDelimiter(delimiter);
setEncapsulator(encapsulator);
setCommentStart(commentStart);
setIgnoreLeadingWhitespaces(ignoreLeadingWhitespace);
setUnicodeEscapeInterpretation(interpretUnicodeEscapes);
setIgnoreEmptyLines(ignoreEmptyLines);
}
public void setDelimiter(char delimiter) { this.delimiter = delimiter; }
public char getDelimiter() { return this.delimiter; }
public void setEncapsulator(char encapsulator) { this.encapsulator = encapsulator; }
public char getEncapsulator() { return this.encapsulator; }
public void setCommentStart(char commentStart) { this.commentStart = commentStart; }
public char getCommentStart() { return this.commentStart; }
public void setIgnoreLeadingWhitespaces(boolean ignoreLeadingWhitespaces) { this.ignoreLeadingWhitespaces = ignoreLeadingWhitespaces; }
public boolean getIgnoreLeadingWhitespaces() { return this.ignoreLeadingWhitespaces; }
public void setUnicodeEscapeInterpretation(boolean interpretUnicodeEscapes) { this.interpretUnicodeEscapes = interpretUnicodeEscapes; }
public boolean getUnicodeEscapeInterpretation() { return this.interpretUnicodeEscapes; }
public void setIgnoreEmptyLines(boolean ignoreEmptyLines) { this.ignoreEmptyLines = ignoreEmptyLines; }
public boolean getIgnoreEmptyLines() { return this.ignoreEmptyLines; }
}

View File

@ -77,67 +77,6 @@ public class CSVParserTest extends TestCase {
} }
// ======================================================
// getters / setters
// ======================================================
public void testGetSetCommentStart() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
parser.setCommentStart('#');
assertEquals(parser.getCommentStart(), '#');
parser.setCommentStart('!');
assertEquals(parser.getCommentStart(), '!');
}
public void testGetSetEncapsulator() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
parser.setEncapsulator('"');
assertEquals(parser.getEncapsulator(), '"');
parser.setEncapsulator('\'');
assertEquals(parser.getEncapsulator(), '\'');
}
public void testGetSetDelimiter() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
parser.setDelimiter(';');
assertEquals(parser.getDelimiter(), ';');
parser.setDelimiter(',');
assertEquals(parser.getDelimiter(), ',');
parser.setDelimiter('\t');
assertEquals(parser.getDelimiter(), '\t');
}
public void testSetCSVStrategy() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
// default settings
assertEquals(parser.getDelimiter(), ',');
assertEquals(parser.getEncapsulator(), '"');
assertEquals(parser.getCommentStart(), '\0');
assertEquals(true, parser.getIgnoreLeadingWhitespaces());
assertEquals(false, parser.getUnicodeEscapeInterpretation());
assertEquals(true, parser.getIgnoreEmptyLines());
// explicit csv settings
parser.setCSVStrategy();
assertEquals(parser.getDelimiter(), ',');
assertEquals(parser.getEncapsulator(), '"');
assertEquals(parser.getCommentStart(), '\0');
assertEquals(true, parser.getIgnoreLeadingWhitespaces());
assertEquals(false, parser.getUnicodeEscapeInterpretation());
assertEquals(true, parser.getIgnoreEmptyLines());
}
public void testSetExcelStrategy() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
// explicit Excel settings
parser.setExcelStrategy();
assertEquals(parser.getDelimiter(), ';');
assertEquals(parser.getEncapsulator(), '"');
assertEquals(parser.getCommentStart(), '\0');
assertEquals(false, parser.getIgnoreLeadingWhitespaces());
assertEquals(false, parser.getUnicodeEscapeInterpretation());
assertEquals(false, parser.getIgnoreEmptyLines());
}
// ====================================================== // ======================================================
// lexer tests // lexer tests
// ====================================================== // ======================================================
@ -146,7 +85,7 @@ public class CSVParserTest extends TestCase {
public void testNextToken1() throws IOException { public void testNextToken1() throws IOException {
String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,"; String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
TestCSVParser parser = new TestCSVParser(new StringReader(code)); TestCSVParser parser = new TestCSVParser(new StringReader(code));
parser.setCSVStrategy(); parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
System.out.println("---------\n" + code + "\n-------------"); System.out.println("---------\n" + code + "\n-------------");
assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
@ -171,9 +110,9 @@ public class CSVParserTest extends TestCase {
*/ */
String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n"; String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
TestCSVParser parser = new TestCSVParser(new StringReader(code)); TestCSVParser parser = new TestCSVParser(new StringReader(code));
parser.setIgnoreEmptyLines(false); parser.getStrategy().setIgnoreEmptyLines(false);
parser.setCSVStrategy(); parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
parser.setCommentStart('#'); parser.getStrategy().setCommentStart('#');
System.out.println("---------\n" + code + "\n-------------"); System.out.println("---------\n" + code + "\n-------------");
assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
@ -198,8 +137,8 @@ public class CSVParserTest extends TestCase {
*/ */
String code = "a,\\,,b\n\\,,"; String code = "a,\\,,b\n\\,,";
TestCSVParser parser = new TestCSVParser(new StringReader(code)); TestCSVParser parser = new TestCSVParser(new StringReader(code));
parser.setCSVStrategy(); parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
parser.setCommentStart('#'); parser.getStrategy().setCommentStart('#');
System.out.println("---------\n" + code + "\n-------------"); System.out.println("---------\n" + code + "\n-------------");
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
// an unquoted single backslash is not an escape char // an unquoted single backslash is not an escape char
@ -222,7 +161,7 @@ public class CSVParserTest extends TestCase {
String code = String code =
"a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b"; "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
TestCSVParser parser = new TestCSVParser(new StringReader(code)); TestCSVParser parser = new TestCSVParser(new StringReader(code));
parser.setCSVStrategy(); parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
System.out.println("---------\n" + code + "\n-------------"); System.out.println("---------\n" + code + "\n-------------");
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
@ -246,7 +185,7 @@ public class CSVParserTest extends TestCase {
+ ",\"\\,\"" + ",\"\\,\""
+ ",\"\"\"\""; + ",\"\"\"\"";
TestCSVParser parser = new TestCSVParser(new StringReader(code)); TestCSVParser parser = new TestCSVParser(new StringReader(code));
parser.setCSVStrategy(); parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
System.out.println("---------\n" + code + "\n-------------"); System.out.println("---------\n" + code + "\n-------------");
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
@ -269,9 +208,7 @@ public class CSVParserTest extends TestCase {
*/ */
String code = "a;'b and \\' more\n'\n!comment;;;;\n;;"; String code = "a;'b and \\' more\n'\n!comment;;;;\n;;";
TestCSVParser parser = new TestCSVParser(new StringReader(code)); TestCSVParser parser = new TestCSVParser(new StringReader(code));
parser.setDelimiter(';'); parser.setStrategy( new CSVStrategy(';', '\'', '!') );
parser.setEncapsulator('\'');
parser.setCommentStart('!');
System.out.println("---------\n" + code + "\n-------------"); System.out.println("---------\n" + code + "\n-------------");
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken()); assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
assertEquals( assertEquals(
@ -344,7 +281,7 @@ public class CSVParserTest extends TestCase {
{"\"hello\"", " \"world\"", "abc\ndef", ""} {"\"hello\"", " \"world\"", "abc\ndef", ""}
}; };
CSVParser parser = new CSVParser(new StringReader(code)); CSVParser parser = new CSVParser(new StringReader(code));
parser.setExcelStrategy(); parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
System.out.println("---------\n" + code + "\n-------------"); System.out.println("---------\n" + code + "\n-------------");
String[][] tmp = parser.getAllValues(); String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length); assertEquals(res.length, tmp.length);
@ -364,7 +301,7 @@ public class CSVParserTest extends TestCase {
{"world", ""} {"world", ""}
}; };
CSVParser parser = new CSVParser(new StringReader(code)); CSVParser parser = new CSVParser(new StringReader(code));
parser.setExcelStrategy(); parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
System.out.println("---------\n" + code + "\n-------------"); System.out.println("---------\n" + code + "\n-------------");
String[][] tmp = parser.getAllValues(); String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length); assertEquals(res.length, tmp.length);
@ -397,7 +334,7 @@ public class CSVParserTest extends TestCase {
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
code = codes[codeIndex]; code = codes[codeIndex];
CSVParser parser = new CSVParser(new StringReader(code)); CSVParser parser = new CSVParser(new StringReader(code));
parser.setExcelStrategy(); parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
System.out.println("---------\n" + code + "\n-------------"); System.out.println("---------\n" + code + "\n-------------");
String[][] tmp = parser.getAllValues(); String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length); assertEquals(res.length, tmp.length);
@ -430,7 +367,7 @@ public class CSVParserTest extends TestCase {
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
code = codes[codeIndex]; code = codes[codeIndex];
CSVParser parser = new CSVParser(new StringReader(code)); CSVParser parser = new CSVParser(new StringReader(code));
parser.setCSVStrategy(); parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
System.out.println("---------\n" + code + "\n-------------"); System.out.println("---------\n" + code + "\n-------------");
String[][] tmp = parser.getAllValues(); String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length); assertEquals(res.length, tmp.length);
@ -460,7 +397,7 @@ public class CSVParserTest extends TestCase {
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
code = codes[codeIndex]; code = codes[codeIndex];
CSVParser parser = new CSVParser(new StringReader(code)); CSVParser parser = new CSVParser(new StringReader(code));
parser.setExcelStrategy(); parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
System.out.println("---------\n" + code + "\n-------------"); System.out.println("---------\n" + code + "\n-------------");
String[][] tmp = parser.getAllValues(); String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length); assertEquals(res.length, tmp.length);
@ -488,7 +425,7 @@ public class CSVParserTest extends TestCase {
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) { for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
code = codes[codeIndex]; code = codes[codeIndex];
CSVParser parser = new CSVParser(new StringReader(code)); CSVParser parser = new CSVParser(new StringReader(code));
parser.setCSVStrategy(); parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
System.out.println("---------\n" + code + "\n-------------"); System.out.println("---------\n" + code + "\n-------------");
String[][] tmp = parser.getAllValues(); String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length); assertEquals(res.length, tmp.length);
@ -663,7 +600,7 @@ public class CSVParserTest extends TestCase {
String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063"; String code = "abc,\\u0070\\u0075\\u0062\\u006C\\u0069\\u0063";
CSVParser parser = new CSVParser(new StringReader(code)); CSVParser parser = new CSVParser(new StringReader(code));
System.out.println("---------\n" + code + "\n-------------"); System.out.println("---------\n" + code + "\n-------------");
parser.setUnicodeEscapeInterpretation(true); parser.getStrategy().setUnicodeEscapeInterpretation(true);
String[] data = parser.getLine(); String[] data = parser.getLine();
assertEquals(2, data.length); assertEquals(2, data.length);
assertEquals("abc", data[0]); assertEquals("abc", data[0]);

View File

@ -0,0 +1,117 @@
/*
* Copyright 2005 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
/**
* CSVStrategyTest
*
* The test are organized in three different sections:
* The 'setter/getter' section, the lexer section and finally the strategy
* section. In case a test fails, you should follow a top-down approach for
* fixing a potential bug (its likely that the strategy itself fails if the lexer
* has problems...).
*/
public class CSVStrategyTest extends TestCase {
/**
* Constructor for JUnit.
* @param name Name to be used in JUnit Test Environment
*/
public CSVStrategyTest(String name) {
super(name);
}
/**
* Returns a Test suite for JUnit.
* @return Test suite for JUnit
*/
public static Test suite() {
return new TestSuite(CSVStrategyTest.class);
}
// ======================================================
// getters / setters
// ======================================================
public void testGetSetCommentStart() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
CSVStrategy strategy = parser.getStrategy();
strategy.setCommentStart('#');
assertEquals(strategy.getCommentStart(), '#');
strategy.setCommentStart('!');
assertEquals(strategy.getCommentStart(), '!');
}
public void testGetSetEncapsulator() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
CSVStrategy strategy = parser.getStrategy();
strategy.setEncapsulator('"');
assertEquals(strategy.getEncapsulator(), '"');
strategy.setEncapsulator('\'');
assertEquals(strategy.getEncapsulator(), '\'');
}
public void testGetSetDelimiter() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
CSVStrategy strategy = parser.getStrategy();
strategy.setDelimiter(';');
assertEquals(strategy.getDelimiter(), ';');
strategy.setDelimiter(',');
assertEquals(strategy.getDelimiter(), ',');
strategy.setDelimiter('\t');
assertEquals(strategy.getDelimiter(), '\t');
}
public void testSetCSVStrategy() {
CSVParser parser = new CSVParser(new StringReader("hello world"));
CSVStrategy strategy = parser.getStrategy();
// default settings
assertEquals(strategy.getDelimiter(), ',');
assertEquals(strategy.getEncapsulator(), '"');
assertEquals(strategy.getCommentStart(), '\0');
assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
assertEquals(true, strategy.getIgnoreEmptyLines());
// explicit csv settings
parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
assertEquals(strategy.getDelimiter(), ',');
assertEquals(strategy.getEncapsulator(), '"');
assertEquals(strategy.getCommentStart(), '\0');
assertEquals(true, strategy.getIgnoreLeadingWhitespaces());
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
assertEquals(true, strategy.getIgnoreEmptyLines());
}
public void testSetExcelStrategy() {
CSVStrategy strategy = CSVStrategy.EXCEL_STRATEGY;
assertEquals(strategy.getDelimiter(), ';');
assertEquals(strategy.getEncapsulator(), '"');
assertEquals(strategy.getCommentStart(), '\0');
assertEquals(false, strategy.getIgnoreLeadingWhitespaces());
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
assertEquals(false, strategy.getIgnoreEmptyLines());
}
}